In [1]:
# Import functionalities
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%load_ext nb_black

<IPython.core.display.Javascript object>

Below, the file containing the candidate properties is read in and the burst times are sorted into an array. This is specified for our file structure and burst fitting procedure, and will need to be changed. <br>
We avoid counting the difference between separate observations as a real wait time by specifying a maximum time difference to consider as real. Here, we use a constant value of 7,000 seconds derived from the length of our observations. <br>
The function assumes that the burst times are in MJD.

In [2]:
# Pull in pulse times
data = pd.read_csv("/home/elewis/FRB121102/all_bursts_bary.csv")
data = data[(data["fit_method"] == "mcmc")]  # excise curvefit bursts
print(str(len(data)) + " good MCMC bursts")

# Break into individual arrays
mjd = data["$\mu_t$ (ms)"]
mjd = mjd.sort_values()

width = data["$\sigma_t$ (ms)"]
width = width.sort_values()

min_diff = 0.001  # Minimum difference between pulse times to consider a real wait time
max_diff = 7000.0  # Maximum difference ^

tols = np.arange(1.0, 51.0, 1.0)

130 good MCMC bursts


<IPython.core.display.Javascript object>

In [3]:
# Tunable period ranges
pers1 = np.arange(0.001, 0.1, 0.0001)
pers2 = np.arange(0.1, 10.0, 0.01)
pers3 = np.arange(10.0, 100.0, 0.1)
search_periods = np.hstack((pers1, pers2, pers3))
print("Searching " + str(len(search_periods)) + " candidate periods")

Searching 2880 candidate periods


<IPython.core.display.Javascript object>

The function below determines how many pulses each candidate period matches. It sorts the results by the most matches.

In [4]:
def get_pcands(
    pulse_times, search_periods, min_diff, max_diff, tolerance, mjd_flag=None
):
    diffs = []
    pulse_times = np.sort(pulse_times)

    # Calculate differences between pulse times
    for i in range(0, len(pulse_times) - 1):
        if mjd_flag == True:
            diff = (pulse_times[i + 1] - pulse_times[i]) * 86400.0
        else:
            diff = pulse_times[i + 1] - pulse_times[i]

        if diff < max_diff and diff > min_diff:
            diffs.append(diff)

    phase_tol = tolerance * 0.01  # tolerance is given in percentages
    npulse_matches = np.zeros(len(search_periods))

    for i in range(0, len(search_periods)):  # for each search period
        diff_matches = []
        for j in range(0, len(diffs)):  # for each difference between pulse times
            phase = (diffs[j] / search_periods[i]) - int(diffs[j] / search_periods[i])
            if phase < (0.5 * phase_tol) or phase > (
                1.0 - 0.5 * phase_tol
            ):  # if difference is within tolerance of this search period
                diff_matches.append(
                    j
                )  # this difference j between two pulse times matches this period i

        # after iterating through all diffs, see how many pulses this period matches
        npulse_matches[i] = len(diff_matches) + 1

    search_periods = pd.Series(search_periods)
    npulse_matches = pd.Series(npulse_matches)
    datadict = {"period": search_periods, "N matches": npulse_matches}
    dataframe = pd.concat(datadict, axis=1)

    # find the periods that matched the most pulses
    best_by_matches = dataframe.sort_values(by="N matches", ascending=False)
    most_matches = best_by_matches.iloc[
        0, 1
    ]  # max number of matches of any candidate period
    mm_periods = best_by_matches[
        (best_by_matches["N matches"] == most_matches)
    ]  # periods with this N matches
    return most_matches, mm_periods, best_by_matches

<IPython.core.display.Javascript object>

First, we run this analysis using *all* of the differences, regardless of which observation they come from.

In [5]:
# Searching all diffs
all_pulses = np.array(mjd)
all_top_periods = []

for tol in tols:
    most_matches, mm_periods, dataframe = get_pcands(
        all_pulses, search_periods, min_diff, max_diff, tol, True
    )
    if tol == 50.0:
        print(
            "At 50% tolerance, best period matches "
            + str(int(most_matches))
            + "/"
            + str(len(all_pulses))
            + " pulses "
        )
    top_periods = mm_periods["period"].to_numpy()
    for tp in top_periods:
        all_top_periods.append(tp)

all_top_periods = np.unique(all_top_periods)
print("Top period candidates:")
print(all_top_periods)

At 50% tolerance, best period matches 86/130 pulses 
Top period candidates:
[ 0.0712  0.0824  0.0969  0.0977  0.0996  0.14    0.59    1.44    2.32
  2.71    3.77    5.11    5.39    5.4     6.75    6.76    7.07    7.08
  8.48   11.3    16.8    27.1    33.6    33.7    33.8    33.9    41.3
 41.4    49.3    49.4    49.5    49.6    49.7    49.8    49.9    50.
 67.6    67.7    67.8   ]


<IPython.core.display.Javascript object>

Now, we pick a random selection of half of the entire sample of bursts and fit periods at tolerance 50%. We do this 'ntrials' times and compare the resulting best matched periods to those found from the search of the whole sample.

In [6]:
ntrials = 100
all_per_matches = []
all_rand_periods = []

# Get period candidates with random trials
for i in range(0, ntrials):
    rng = np.random.default_rng(i)
    rand_bursts = rng.choice(
        all_pulses, int(len(all_pulses) / 2)
    )  # randomly pick half of bursts from entire sample
    most_matches, mm_periods, dataframe = get_pcands(
        rand_bursts, search_periods, min_diff, max_diff, 50.0, True
    )
    top_periods = mm_periods[
        "period"
    ].to_numpy()  # periods with max number of matches at this tolerance
    for tp in top_periods:
        all_rand_periods.append(tp)


all_rand_periods = np.unique(
    all_rand_periods
)  # omit duplicates from between tolerances


# Compare to best periods from full sample
for period in all_rand_periods:
    oneperc = period * 0.01
    for cand in all_top_periods:
        diff = abs(period - cand)
        if (
            diff < oneperc
        ):  # if difference between period candidates is within 1% of period
            print(
                "Random candidate "
                + str(np.round(period, 4))
                + " s is close to candidate "
                + str(np.round(cand, 4))
                + " s"
            )

Random candidate 0.0816 s is close to candidate 0.0824 s
Random candidate 0.0824 s is close to candidate 0.0824 s
Random candidate 0.0976 s is close to candidate 0.0969 s
Random candidate 0.0976 s is close to candidate 0.0977 s
Random candidate 0.0977 s is close to candidate 0.0969 s
Random candidate 0.0977 s is close to candidate 0.0977 s
Random candidate 0.0986 s is close to candidate 0.0977 s
Random candidate 0.099 s is close to candidate 0.0996 s
Random candidate 0.0992 s is close to candidate 0.0996 s
Random candidate 0.0996 s is close to candidate 0.0996 s
Random candidate 1.44 s is close to candidate 1.44 s
Random candidate 2.34 s is close to candidate 2.32 s
Random candidate 3.75 s is close to candidate 3.77 s
Random candidate 5.09 s is close to candidate 5.11 s
Random candidate 5.14 s is close to candidate 5.11 s
Random candidate 5.16 s is close to candidate 5.11 s
Random candidate 5.4 s is close to candidate 5.39 s
Random candidate 5.4 s is close to candidate 5.4 s
Random can

<IPython.core.display.Javascript object>

Next, we compare the FFA candidates from each observation to the top period candidates for the full search.

In [7]:
with open("period/ffa/57644_ffa_cands.txt", "r") as f1:
    for line in f1:
        split_line = line.split(",")
        period = float(split_line[0])
        oneperc = period * 0.01  # 1% of FFA cand period
        for cand in all_top_periods:
            diff = abs(period - cand)
            if diff < oneperc:
                # if difference between period candidates is within 1% of period
                print(
                    "57644 FFA candidate "
                    + str(np.round(period, 4))
                    + " s is close to candidate "
                    + str(np.round(cand, 4))
                    + " s"
                )

with open("period/ffa/57645_ffa_cands.txt", "r") as f2:
    for line in f2:
        split_line = line.split(",")
        period = float(split_line[0])
        oneperc = period * 0.01  # 1% of FFA cand period
        for cand in all_top_periods:
            diff = abs(period - cand)
            if diff < oneperc:
                # if difference between period candidates is within 1% of period
                print(
                    "57645 FFA candidate "
                    + str(np.round(period, 4))
                    + " s is close to candidate "
                    + str(np.round(cand, 4))
                    + " s"
                )

57645 FFA candidate 5.3777 s is close to candidate 5.39 s
57645 FFA candidate 5.3777 s is close to candidate 5.4 s
57645 FFA candidate 2.6889 s is close to candidate 2.71 s
57645 FFA candidate 2.3402 s is close to candidate 2.32 s
57645 FFA candidate 1.4401 s is close to candidate 1.44 s
57645 FFA candidate 33.277 s is close to candidate 33.6 s
57645 FFA candidate 33.8222 s is close to candidate 33.6 s
57645 FFA candidate 33.8222 s is close to candidate 33.7 s
57645 FFA candidate 33.8222 s is close to candidate 33.8 s
57645 FFA candidate 33.8222 s is close to candidate 33.9 s


<IPython.core.display.Javascript object>

To consider each observation separately, we split the burst times into separate arrays for each observation. This won't work right if there's a single observation which spans multiple MJDs.

In [8]:
# Get the number of individual MJDs
days = []
for pt in mjd:
    time = str(pt)
    split_time = time.split(".")
    day = split_time[0]
    days.append(day)

days = np.unique(days)
n_mjds = len(days)

# Create array with as many elements as there are MJDs
sep_pulse_times = []
# Each element will be its own array with pulse times from only one MJD
i = 0
for day in days:
    day_pulse_times = []  # pulse times just for this day

    for pt in mjd:
        time = str(pt)
        split_time = time.split(".")
        pt_day = split_time[0]
        if pt_day == day:
            day_pulse_times.append(pt)

    sep_pulse_times.append(day_pulse_times)
    i += 1

<IPython.core.display.Javascript object>

The "sep_pulse_times" variable is an array where each element is an array containing the pulse times from one single MJD. <br>
Below, we search each observation individually and separate out the best candidate periods for each.

In [9]:
best_obs_periods = []
# Each element in this array is an array with the best period candidates from one observation

for i in range(0, len(sep_pulse_times)):
    pt_split = str(sep_pulse_times[i][0]).split(".")
    mjd = pt_split[0]
    print("Observation " + str(i) + ": MJD " + str(mjd))

    these_pulse_times = sep_pulse_times[i]
    all_obs_periods = []
    # Array that will hold all best period candidates

    # Get top period candidates for each observation
    for tol in tols:
        most_matches, mm_periods, dataframe = get_pcands(
            these_pulse_times, search_periods, min_diff, max_diff, tol, True
        )
        top_periods = mm_periods[
            "period"
        ].to_numpy()  # periods with max number of matches at this tolerance
        for tp in top_periods:
            all_obs_periods.append(tp)
        if tol == 50.0:
            print(
                "At 50% tolerance, best period matches "
                + str(int(most_matches))
                + "/"
                + str(len(these_pulse_times))
                + " pulses "
            )
    # After iterating through all tolerances and getting the best periods at each tolerance:
    all_obs_periods = np.unique(all_obs_periods)
    best_obs_periods.append(all_obs_periods)

Observation 0: MJD 57644
At 50% tolerance, best period matches 42/54 pulses 
Observation 1: MJD 57645
At 50% tolerance, best period matches 55/76 pulses 


<IPython.core.display.Javascript object>

Finally, we search for candidates that match between the two observations.

In [10]:
all_best_periods = []

for i in range(0, len(best_obs_periods)):
    for j in range(0, len(best_obs_periods)):
        if (
            j - i < 0
        ):  # This searches each pair once and does not check a pair with itself
            set_a = best_obs_periods[i]
            set_b = best_obs_periods[j]

            for period in set_a:
                oneperc = 0.01 * period
                for cand in set_b:
                    diff = abs(period - cand)
                    if diff < oneperc:
                        # if difference between period candidates is within 1% of period
                        print(
                            "Match between "
                            + str(period)
                            + " s and "
                            + str(cand)
                            + " s"
                        )

<IPython.core.display.Javascript object>