In [1]:
import pandas as pd
import numpy as np

In [2]:
def keep_nth_subburst(cat):
    """
    the catalog 1 analysis was done with the nth sub-burst for each source
    the public version of catalog 1 has all the sub-bursts separate, so we first keep
    
    logistically, we look at the sub_num array, which is an array of ints ranging
    from 0 to 5. if a 0 is immediately followed by a non-zero number (or sequence of numbers),
    we keep only the highest non-zero number in that sequence.
    `np.roll(arr, -1)` shifts the elements one position to the left.
    """
    sub_nums = cat['sub_num'].values
    indexes = np.where(np.roll(sub_nums, -1) == 0)[0]
    
    return cat.iloc[indexes]

def cut_cat_exposure(cat):
    """
    42 sub-bursts cut due to exposure issues
    these are marked with `excluded_flag = 1`, so we want
    all the bursts that have `excluded_flag = 0`
    """
    return cat[
        cat['excluded_flag'] == 0
    ]

def cut_far_sidelobe(cat):
    """
    3 sub-bursts cut because they are far-sidelobe detections
    their TNS names are 'FRB20190125B', 'FRB20190202B', 'FRB20190210D'
    """
    return cat[
        (cat['tns_name'] != 'FRB20190125B')
        & (cat['tns_name'] != 'FRB20190202B')
        & (cat['tns_name'] != 'FRB20190210D')
    ]

def cut_near_amb(cat):
    """
    cuts sub-bursts if the observed DM is within 1.5x the MW contribution
    (and always takes the more conservative MW contribution between ne2001 and ymw16)
    """
    dms_ne2001 = cat['dm_fitb'] - cat['dm_exc_ne2001']
    dms_ym16 = cat['dm_fitb'] - cat['dm_exc_ymw16']
    
    return cat[
        cat['dm_fitb'] >= 1.5 * np.maximum(dms_ne2001, dms_ym16)
    ]

def cut_low_snr(cat):
    """
    cuts sub-bursts if their signal-to-noise is < 12
    """
    return cat[
        cat['bonsai_snr'] >= 12
    ]

def cut_high_scat(cat):
    """
    cuts sub-bursts if their scattering timescale is > 10 ms
    """
    low_scats_ii = np.array([
        x for x in range(len(cat)) if
        ('<' not in cat['scat_time'].values[x] and float(cat['scat_time'].values[x]) < 10e-3)
        or ('<' in cat['scat_time'].values[x] and float(cat['scat_time'].values[x][1:]) < 10e-3)
    ])
    return cat.iloc[low_scats_ii]

def cut_repeat_bursts(cat):
    """
    mjd is time of arrivial with reference to ~400 MHz for specific sub-burst
    we assert that the catalog is sorted in time
    
    then we find the repeater names and keep only the first instance of each repeater
    """
    a = cat['mjd_400'].values
    assert np.all(a[:-1] <= a[1:])
    
    non_repeat_iis = np.array([x for x in range(len(cat)) if cat['repeater_name'].values[x] == '-9999'])
    
    repeater_iis = np.array([x for x in range(len(cat)) if cat['repeater_name'].values[x] != '-9999'])
    repeater_of = cat.iloc[repeater_iis]['repeater_name'].values
    keep_repeat_iis = np.array([list(repeater_of).index(elem) for elem in set(repeater_of)])
    
    keep_iis = np.sort(np.concatenate((non_repeat_iis, repeater_iis[keep_repeat_iis])))
    
    return cat.iloc[keep_iis]

In [3]:
cat1_og = pd.read_csv('chimefrbcat1.csv')
print(f"the original catalog has {len(cat1_og)} (sub-)bursts")

cat1_og2 = keep_nth_subburst(cat1_og)
print(f"after keeping only the nth sub-burst of each source, the original catalog has {len(cat1_og2)} bursts")

cat1 = cut_cat_exposure(cat1_og2)
print(f"after exposure issue cuts, it has {len(cat1)} bursts")

cat1 = cut_far_sidelobe(cat1)
print(f"after far sidelobe cuts, it has {len(cat1)} bursts")

cat1 = cut_near_amb(cat1)
print(f"after near_AMB cuts, it has {len(cat1)} bursts")

cat1 = cut_low_snr(cat1)
print(f"after low SNR cuts, it has {len(cat1)} bursts")

# at this point, all sources have DM > 100
assert np.min(cat1['dm_fitb']) > 100

cat1 = cut_high_scat(cat1)
print(f"after high scattering cuts, it has {len(cat1)} bursts")

cat1 = cut_repeat_bursts(cat1)
print(f"after repeat burst cuts, it has {len(cat1)} bursts")


the original catalog has 600 (sub-)bursts
after keeping only the nth sub-burst of each source, the original catalog has 536 bursts
after exposure issue cuts, it has 497 bursts
after far sidelobe cuts, it has 494 bursts
after near_AMB cuts, it has 470 bursts
after low SNR cuts, it has 298 bursts
after high scattering cuts, it has 270 bursts
after repeat burst cuts, it has 265 bursts
