In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec

import cfod
from cfod import catalog
from cfod.routines import waterfaller

import h5py
import scipy
import wget

#Goal:

- find first burst repeaters
- streamline getting the graphs out
- find way of getting the data files (not manually) -> wget usage maybe

In [2]:
data_catalog = catalog.as_dataframe()
data_catalog

Unnamed: 0,tns_name,previous_name,repeater_name,ra,ra_err,ra_notes,dec,dec_err,dec_notes,gl,...,width_fitb,width_fitb_err,sp_idx,sp_idx_err,sp_run,sp_run_err,high_freq,low_freq,peak_freq,excluded_flag
0,FRB20180725A,180725.J0613+67,-9999,93.42,0.039,-9999,67.07,0.210,-9999,147.29,...,0.000296,0.000076,38.20,3.70,-45.80,4.20,760.1,485.3,607.4,1
1,FRB20180727A,180727.J1311+26,-9999,197.72,0.130,-9999,26.42,0.260,-9999,24.76,...,0.00139,0.000170,3.80,1.80,-9.20,3.00,800.2,400.2,493.3,1
2,FRB20180729A,180729.J1316+55,-9999,199.40,0.120,-9999,55.58,0.084,-9999,115.26,...,<0.00010,-9999.000000,16.46,0.24,-30.21,0.38,692.7,400.2,525.6,1
3,FRB20180729B,180729.J0558+56,-9999,89.93,0.270,-9999,56.50,0.240,-9999,156.90,...,0.000314,0.000083,14.50,3.50,-14.60,3.50,800.2,441.8,657.5,1
4,FRB20180730A,180730.J0353+87,-9999,57.39,0.032,-9999,87.19,0.200,-9999,125.11,...,0.000468,0.000040,4.27,0.30,-11.31,0.48,759.2,400.2,483.5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,FRB20190701A,-9999,-9999,277.47,0.210,-9999,59.04,0.220,-9999,88.29,...,0.000608,0.000057,-1.10,1.50,3.30,1.90,800.2,400.2,800.2,0
596,FRB20190701B,-9999,-9999,302.93,0.220,-9999,80.18,0.240,-9999,112.88,...,0.00063,0.000130,3.90,1.70,-11.80,3.10,732.8,400.2,471.5,0
597,FRB20190701C,-9999,-9999,96.36,0.230,-9999,81.63,0.270,-9999,132.18,...,0.00144,0.000160,46.20,9.00,-211.00,41.00,495.5,402.2,446.4,0
598,FRB20190701D,-9999,-9999,112.10,0.180,-9999,66.70,0.160,-9999,149.28,...,0.00140,0.000120,6.49,0.75,-20.90,1.60,651.8,400.2,467.6,0


In [3]:
def boxcar_kernel(width):
    width = int(round(width, 0))
    return np.ones(width, dtype="float32") / np.sqrt(width)


def find_burst(ts, min_width=1, max_width=128):
    min_width = int(min_width)
    max_width = int(max_width)
    # do not search widths bigger than timeseries
    widths = list(range(min_width, min(max_width + 1, len(ts)-2)))
    # envelope finding
    snrs = np.empty_like(widths, dtype=float)
    peaks = np.empty_like(widths, dtype=int)
    for i in range(len(widths)):
        convolved = scipy.signal.convolve(ts, boxcar_kernel(widths[i]), mode="same")
        peaks[i] = np.nanargmax(convolved)
        snrs[i] = convolved[peaks[i]]
    best_idx = np.nanargmax(snrs)
    return peaks[best_idx], widths[best_idx], snrs[best_idx]

def bin_freq_channels(data, fbin_factor=4):
    num_chan = data.shape[0]
    if num_chan % fbin_factor != 0:
        raise ValueError("frequency binning factor `fbin_factor` should be even")
    data = np.nanmean(data.reshape((num_chan // fbin_factor, fbin_factor) + data.shape[1:]), axis=1)
    return data

In [14]:
#find the download url and import the data for a burst given its tns_name from the Data table.
def get_data(burst_index_number):
    example_tns = data_catalog["tns_name"][burst_index_number]
    url_base = "https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/files/vault/AstroDataCitationDOI/CISTI.CANFAR/21.0007/data/waterfalls/data/"
    waterfall_string = '_waterfall.h5'
    url = url_base + example_tns +waterfall_string
    
    #statement implemented so in testing every run does not create duplicates of the same file (saving runtime and storage)
    try:
        Data_from_source = example_tns + waterfall_string
        data = h5py.File(Data_from_source, "r")
        print("file from folder")

        
    except:
        print('file not in folder, downloading')
        Data_from_source = wget.download(url)
        data = h5py.File(Data_from_source, "r")
     
    return data

def make_curves(data):
    data = data["frb"]
    eventname = data.attrs["tns_name"].decode()
    wfall = data["wfall"][:]
    model_wfall = data["model_wfall"][:]
    plot_time = data["plot_time"][:]
    plot_freq = data["plot_freq"][:]
    ts = data["ts"][:]
    model_ts = data["model_ts"][:]
    spec = data["spec"][:]
    model_spec = data["model_spec"][:]
    extent = data["extent"][:]
    dm = data.attrs["dm"][()]
    scatterfit = data.attrs["scatterfit"][()]
    cal_obs_date = data.attrs["calibration_observation_date"].decode()
    cal_source_name = data.attrs["calibration_source_name"].decode()
    cal_wfall =  data["calibrated_wfall"][:]

    dt = np.median(np.diff(plot_time)) # the delta (time) between time bins 
    # dt in mu s
    # this value is the same for both caliberated and uncalibrated data
    ts_with_RFI = ts

    q1 = np.nanquantile(spec, 0.25)
    q3 = np.nanquantile(spec, 0.75)
    iqr = q3 - q1

    # additional masking of channels with RFI
    rfi_masking_var_factor = 3

    channel_variance = np.nanvar(wfall, axis=1)
    mean_channel_variance = np.nanmean(channel_variance)

    with np.errstate(invalid="ignore"):
        rfi_mask = (channel_variance > \
                    rfi_masking_var_factor * mean_channel_variance) \
                    | (spec[::-1] < q1 - 1.5 * iqr) | (spec[::-1] > q3 + 1.5 * iqr)
    wfall[rfi_mask,...] = np.nan
    model_wfall[rfi_mask,...] = np.nan
    spec[rfi_mask[::-1]] = np.nan

    # -------------- start plotting ------------
    # remake time-series after RFI masking
    ts = np.nansum(wfall, axis=0)
    model_ts = np.nansum(model_wfall, axis=0)


    peak, width, snr = find_burst(ts)
    print(f"Peak: {peak} at time sample, Width = {width*dt} ms, SNR = {snr}")

    # bin frequency channels such that we have 16,384/16 = 1024 frequency channels 
    #wfall = bin_freq_channels(wfall, 16)
    

    ### time stamps relative to the peak
    peak_idx = np.argmax(ts)
    plot_time -= plot_time[peak_idx]

    # prepare time-series for histogramming
    plot_time -= dt / 2.
    plot_time = np.append(plot_time, plot_time[-1] + dt)


    ### plot dynamic spectrum
    #wfall[np.isnan(wfall)] = np.nanmedian(wfall)   # replace nans in the data with the data median


#     plt.figure(figsize =(12,8))
#     ### plot time-series
#     plt.plot(plot_time, np.append(ts, ts[-1]), color="tab:gray",
#                     drawstyle="steps-post", label='data')




    cmap = plt.cm.viridis

    ### plot model time-series and spectrum
#     if scatterfit:
#         plt.plot(plot_time, np.append(model_ts, model_ts[-1]),
#                         color=cmap(0.25), drawstyle="steps-post", lw=2, label="model")
#     else:
    plt.plot(plot_time, np.append(model_ts, model_ts[-1]),
#                         color=cmap(0.5), drawstyle="steps-post", lw=1, label="model")


    # also do so for the calibrated data
    cal_wfall[np.isnan(cal_wfall)] = np.nanmedian(cal_wfall)   # replace nans in the data with the data median
    #bin frequency channels such that we have 16,384/16 = 1024 frequency channels 
    cal_wfall = bin_freq_channels(cal_wfall,16) 
    
    cal_ts = np.nanmean(cal_wfall, axis = 0)
    times = np.arange(len(cal_ts))*dt
    peak_idx = np.argmax(cal_ts)
    times -= times[peak_idx]
    times -= dt / 2.
    
    #make calibrated signal less braod
    deff_index_min = -int(times[0] - plot_time[0])
    len(plot_time)
    times_shorter = times[deff_index_min: (deff_index_min +len(plot_time))]
    cal_ts_shorter = cal_ts[deff_index_min: (deff_index_min +len(plot_time))]
    
#     plt.plot(times_shorter, 120*cal_ts_shorter, drawstyle="steps-post", label="cal*120")

#     plt.legend(loc="upper left")

    #plt.xlim(-10,15)
    plt.imshow
    
    return plot_time, np.append(ts, ts[-1]), np.append(model_ts, model_ts[-1]),times_shorter, cal_ts_shorter


SyntaxError: invalid syntax (<ipython-input-14-c5bc06ecb775>, line 111)

In [15]:
def Get_me_FRB_data(burst_index_number):
    data= get_data(burst_index_number =burst_index_number)
    plot_time, ts_full_list, model_ts_full_list, times_shorter, cal_ts_shorter = make_curves(data)
    return plot_time, ts_full_list, model_ts_full_list, times_shorter, cal_ts_shorter

In [16]:
Get_me_FRB_data(11)

file from folder
Peak: 14 at time sample, Width = 4.915199970128015 ms, SNR = 279.1211697946155


  channel_variance = np.nanvar(wfall, axis=1)


(array([-12.28799993, -11.30495993, -10.32191994,  -9.33887994,
         -8.35583995,  -7.37279996,  -6.38975996,  -5.40671997,
         -4.42367997,  -3.44063998,  -2.45759999,  -1.47455999,
         -0.49152   ,   0.49152   ,   1.47455999,   2.45759999,
          3.44063998,   4.42367997,   5.40671997,   6.38975996,
          7.37279996,   8.35583995,   9.33887994,  10.32191994,
         11.30495993,  12.28799993,  13.27103992,  14.25407991,
         15.23711991,  16.2201599 ,  17.2031999 ,  18.18623989,
         19.16927988,  20.15231988,  21.13535987,  22.11839987,
         23.10143986,  24.08447985,  25.06751985]),
 array([ 1.34357119e+01, -3.49527096e+01,  8.02105844e+00, -8.38086623e+00,
        -2.74600269e+01,  1.39898485e+00,  1.06364254e+01, -1.27468101e+01,
        -1.04577683e+01, -6.17238313e+00, -9.39773977e-01,  5.91150551e+01,
         2.58192671e+02,  1.04883690e+02,  7.95916438e+01,  1.05801411e+02,
         7.56645014e+01, -4.39592654e+00,  7.66707110e+00, -4.043212