In [1]:
#Import functionalities
import numpy as np
from numpy.random import default_rng
import pandas as pd
import matplotlib.pyplot as plt

%load_ext nb_black

Below, the file containing the candidate properties is read in and the burst times are sorted into an array. This is specified for our file structure and burst fitting procedure, and will likely need to be changed.

In [2]:
#Read in file and only look at bursts fit with MCMC
data = pd.read_csv('../data/all_bursts_bary.csv')
data = data[(data['fit_method']=='mcmc')] #excise curvefit bursts
print(str(len(data)) + ' good MCMC bursts')

#Get array of all pulse times
mjd = data['$\mu_t$ (ms)']
mjd = mjd.sort_values()

130 good MCMC bursts


Period search function <br>
Takes in differences between pulse times (in seconds) and tolerance from 0 to 1 <br>
Chooses search periods by dividing the minimum difference by integers until 1 ms is reached (or it's divided by 256) <br>
Returns the maximum number of diffs that any period matched; a list of those periods; and a dataframe of all of the candidate periods sorted by the number of diffs they matched

In [3]:
def p_search(diffs, tolerance):
#Assumes differences (in seconds) are given
#Assumes tolerance is given between 0 and 1, e.g. 0.01 = 1% tolerance 
    most_matches=[]
    best_by_matches=[]
    this_mindiff = np.amin(diffs)
    phase_tol = tolerance
    
    search_periods=[]

    i=1
    sp = this_mindiff
    while sp>=(0.001) and i<256:
      search_periods.append(sp)
      sp = this_mindiff/i
      i+=1
        
    #print('Searching ' + str(len(search_periods)) + ' period(s)')
    #print(search_periods)
    matches = np.zeros(len(search_periods))
    
    #For each search period, find how many diffs match it to within this tolerance
    for i in range(0,len(search_periods)):
    #for each difference between pulse times
      for j in range(0,len(diffs)): 
        #This is the same as in getper.py
        phase = (diffs[j]/search_periods[i]) - int(diffs[j]/search_periods[i]) 
        if phase<(0.5*phase_tol) or phase>(1.0-0.5*phase_tol): 
          matches[i]+=1  
        
    search_periods = pd.Series(search_periods)
    matches = pd.Series(matches)
    datadict = {'period': search_periods, 'N matches': matches}
    dataframe = pd.concat(datadict, axis=1)
    
    #find the periods that matched the most pulses
    best_by_matches = dataframe.sort_values(by='N matches',ascending=False) 
    most_matches = best_by_matches.iloc[0,1] #max number of matches of any candidate period
    mm_periods = best_by_matches[(best_by_matches['N matches']==most_matches)] #periods with this N matches
    return most_matches, mm_periods, best_by_matches

Random timeseries search function <br>
Takes in the MJD of the observation (specified for our observations), the tolerance from 0 to 1, the number of trials to perform over, the minimum difference to consider between pulses, and a flag to run this on 3/4 of the number of pulses <br>
Randomly places the number of pulses in some length timeseries, and runs p_search on that list of differences (greater than the specified mindiff) <br>
Returns a dataframe of the number of differences, and for how many trials that was the maximum number of differences matched by any period.

In [4]:
def random_ts_search(mjd,tolerance,ntrials,mindiff,tq_flag=None):
    #Observational parameters- length, npulses
    if mjd==57644:
        npoints = 72840576 #Number of points in time series
        npulses = 54 #Total number of pulses detected, to randomly place in this time series
    elif mjd==57645:
        npoints = 67692032
        npulses = 75
    tsamp=81.92e-6
    
    #The "tq_flag" variable is True if you're running this code on 3/4 of the random data, 
    #and changes "npulses" to 3/4 of npulses 
    if tq_flag==True:
      npulses= int((3*npulses)/4)
        
    all_max_matches=[] #Array which holds number of maximum matches for each random trial
    
    for i in range(0,ntrials):
      rand_pts = [] #times of random pulses, in seconds, from 0 to the observation length
      all_diffs = [] #array holding differences between pulse times, in seconds
        
      #Randomly place n pulses in the time series
      rng = default_rng()
      r=rng.random(npulses)
      for j in range(0,len(r)):
        pt = r[j]*npoints*tsamp
        rand_pts.append(pt)
      
      rand_pts=np.sort(rand_pts)
      #print(rand_pts)  
      #Calculate diffs and remove diffs shorter than mindiff
      for k in range(1,len(rand_pts)):
        diff = (rand_pts[k]-rand_pts[k-1])
        if diff > mindiff:
          all_diffs.append(diff) 
 
      #Run psearch: returns most matches, periods at that match, and sorted dataframe
      most_matches, mm_periods, best_by_matches = p_search(all_diffs,tolerance)
      all_max_matches.append(most_matches)
    
    ###DONE RUNNING THROUGH ALL TRIALS###
    #Make dataframe of max number of matches, and number of trials that match that number of diffs
    match, counts = np.unique(all_max_matches, return_counts=True)
    d = {'ndiffs': match, 'counts': counts}
    df = pd.DataFrame(data=d)
    df= df.sort_values(by='ndiffs',ascending=False)
    return df

Random subset search function <br>
Intakes the list of real pulse times, the tolerance (between 0 and 1), the number of trials to run this over, and the minimum difference to consider a real pulse <br>
Randomly chooses 3/4 of the pulses and runs p_search on them, returns list of best periods for each trial

In [5]:
def random_subset_search(pulse_times, tolerance, ntrials, mindiff,mjd_flag=None):
  all_max_matches=[]
  all_rand_bps=[]
  all_bp_matches=[]
 
  #randomly select 3/4 of pulses from observation
  if (len(pulse_times)+1)%2==0:
      npulses=3*(len(pulse_times)+1)/4
  else:
      npulses=3*(len(pulse_times)+2)/4   
    
  for i in range(0,ntrials):
    #Randomly select npulses pulses from the list of pulse times
    pulse_times = np.random.choice(pulse_times,size=int(npulses),replace=False)  

    #Get differences and eliminate diffs below some threshold
    all_diffs=[]

    for i in range(1,len(pulse_times)):
      if mjd_flag==True:
        diff = (pulse_times[i]-pulse_times[i-1])*24.*3600. #convert difference from MJD to seconds
      else:
        diff = (pulse_times[i]-pulse_times[i-1])
      if diff > mindiff:
        all_diffs.append(diff)

    #run getper on this series of pulse times
    most_matches, mm_periods, best_by_matches = p_search(all_diffs,tolerance)
    all_max_matches.append(most_matches)
    best_periods = mm_periods['period'].to_numpy()
    
    for i in range(0,len(mm_periods)):
        candper=mm_periods.iloc[i,0]
        all_rand_bps.append(candper)
        cp_matches=mm_periods.iloc[i,1]
        all_bp_matches.append(cp_matches)
   
  #Returns dataframe with maximum matches (matches) and number of trials that matched that many diffs (counts)
  sorted_max_matches = np.sort(all_max_matches)
  match, counts = np.unique(sorted_max_matches, return_counts=True)

  dict = {'matches': match, 'counts': counts}
  df = pd.DataFrame(data=dict)
  df = df.sort_values(by='matches',ascending=False)
  return df, all_rand_bps, all_bp_matches

First, MJD 57644-- get differences (in seconds) longer than some specified minimum

In [6]:
#Separate into only bursts from this MJD
burst_times = mjd[(57645.00-mjd)>0] #only get TOAs from MJD 57644
burst_times = burst_times.to_numpy()
print('Starting with ' + str(len(burst_times)-1) + ' diffs')

#Get differences and eliminate diffs below some threshold
mindiff = 0.05 #in seconds
all_diffs=[]

for i in range(1,len(burst_times)):
    diff = (burst_times[i]-burst_times[i-1])*24.*3600. #convert difference from MJD to seconds
    if diff > mindiff:
      all_diffs.append(diff)
   
mindiff_all=np.round(np.amin(all_diffs),4)
print(str(len(all_diffs)) + ' diffs above ' + str(mindiff) + ' seconds')
print('New mindiff ' + str(mindiff_all) + ' seconds')

Starting with 53 diffs
48 diffs above 0.05 seconds
New mindiff 0.0731 seconds


Periodicity search and random search on all pulses from 57644

In [7]:
tols=np.arange(0.01,0.51,0.01)
ntrials_subset=100
ntrials=1000

In [8]:
npulses=len(all_diffs)+1 #number of pulses to randomly distribute
all_best_periods=[] #array which holds periods found by period search on whole data at each tolerance

for tol in tols:
  best_by_matches=[]
  most_matches, mm_periods, best_by_matches = p_search(all_diffs,tol)
  best_periods = mm_periods['period'].to_numpy() #periods which match max # diffs to real data
  tol_pct = int(np.round(tol*100,0))
  print('Tolerance: ' + str(tol_pct) + '%')
  print('Best periods match ' + str(most_matches) + '/' + str(len(all_diffs)) + ' diffs')

  for per in best_periods:
    all_best_periods.append(per)
    
  #Search over random timeseries ntrials times  
  random_df = random_ts_search(57644,tol,ntrials,mindiff,False)
  gt_matches = random_df[(random_df['ndiffs']>=most_matches)]
  
  counts=0
  for i in range(0,len(gt_matches)):
    counts += gt_matches.iloc[i,1]
  print(str(counts) + '/' + str(ntrials) + ' random trials match at least as many diffs as this')
  if counts <= (ntrials)*0.25: #if real data does better than 75% of random trials
    print(mm_periods) #print best periods at this tolerance

  ###Done iterating over all tolerances###    

Tolerance: 1%
Best periods match 5.0/48 diffs
309/1000 random trials match at least as many diffs as this
Tolerance: 2%
Best periods match 5.0/48 diffs
983/1000 random trials match at least as many diffs as this
Tolerance: 3%
Best periods match 5.0/48 diffs
1000/1000 random trials match at least as many diffs as this
Tolerance: 4%
Best periods match 6.0/48 diffs
999/1000 random trials match at least as many diffs as this
Tolerance: 5%
Best periods match 8.0/48 diffs
946/1000 random trials match at least as many diffs as this
Tolerance: 6%
Best periods match 10.0/48 diffs
567/1000 random trials match at least as many diffs as this
Tolerance: 7%
Best periods match 10.0/48 diffs
865/1000 random trials match at least as many diffs as this
Tolerance: 8%
Best periods match 10.0/48 diffs
991/1000 random trials match at least as many diffs as this
Tolerance: 9%
Best periods match 11.0/48 diffs
975/1000 random trials match at least as many diffs as this
Tolerance: 10%
Best periods match 11.0/48

Same process, for 3/4 of the pulses from 57644

In [9]:
all_tq_periods=[] #Best periods from subset search

for tol in tols: 
  tol_pct = int(np.round(tol*100,0))
  print('Tolerance: ' + str(tol_pct) + '%')
    
  #Search over randomly chosen subset of 3/4 of real data, ntrials_subset times 
  random_subset_df, all_rand_bps, all_bp_matches = random_subset_search(burst_times, tol, ntrials_subset, mindiff,True)
  most_matches = random_subset_df.iloc[0,0]
  ncounts = random_subset_df.iloc[0,1]
  print('Best real periods over all trials matched ' + str(most_matches) + ' diffs, which occurred in ' + str(ncounts) + '/' + str(ntrials_subset) + ' trials')
  
  #Search over same-length timeseries with 3/4 of randomly distributed pulses, ntrials times 
  random_df = random_ts_search(57644,tol,ntrials,mindiff,True)
  gt_matches = random_df[(random_df['ndiffs']>=most_matches)]
  
  counts=0
  for i in range(0,len(gt_matches)):
    counts += gt_matches.iloc[i,1]
  print(str(counts) + '/' + str(ntrials) + ' random trials match at least as many diffs as this')
  if counts <= (ntrials)*0.25: #if real data does better than 75% of random trials
    #Get the indices of the array of max matches where the most number of matches are found
    index = (np.where(all_bp_matches>=most_matches))[0] 
    #print best periods at this tolerance
    print('Best periods at this tolerance: (s)')
    for k in index:
      print(all_rand_bps[k]) 
      all_tq_periods.append(all_rand_bps[k])
  

all_tq_periods=np.unique(all_tq_periods)
all_tq_periods=np.sort(all_tq_periods)
print(all_tq_periods)
#all_rand_bps=np.unique(all_rand_bps)

#for per in all_best_periods:
    #for randper in all_rand_bps:
        #diff = abs(per-randper)
        #if diff<(0.005*per): #if real period and random period are close together
          #print(str(np.round(per,5)) + ' is close to rand period ' + str(np.round(randper,5)))

Tolerance: 1%
Best real periods over all trials matched 5.0 diffs, which occurred in 1/100 trials
117/1000 random trials match at least as many diffs as this
Best periods at this tolerance: (s)
0.897116188515121
Tolerance: 2%
Best real periods over all trials matched 5.0 diffs, which occurred in 22/100 trials
756/1000 random trials match at least as many diffs as this
Tolerance: 3%
Best real periods over all trials matched 7.0 diffs, which occurred in 2/100 trials
194/1000 random trials match at least as many diffs as this
Best periods at this tolerance: (s)
0.7490272136405111
4.078527234284388
Tolerance: 4%
Best real periods over all trials matched 7.0 diffs, which occurred in 2/100 trials
541/1000 random trials match at least as many diffs as this
Tolerance: 5%
Best real periods over all trials matched 8.0 diffs, which occurred in 3/100 trials
439/1000 random trials match at least as many diffs as this
Tolerance: 6%
Best real periods over all trials matched 8.0 diffs, which occurred 

Next, MJD 57645-- get differences (in seconds) longer than some specified minimum

In [10]:
#Separate into only bursts from this MJD
burst_times = mjd[(57645.00-mjd)<0] #only get TOAs from MJD 57645
burst_times = burst_times.to_numpy()
print('Starting with ' + str(len(burst_times)-1) + ' diffs')

#Get differences and eliminate diffs below some threshold
all_diffs=[]

for i in range(1,len(burst_times)):
    diff = (burst_times[i]-burst_times[i-1])*24.*3600. #convert difference from MJD to seconds
    if diff > mindiff:
      all_diffs.append(diff)
    
mindiff_all=np.round(np.amin(all_diffs),4)
print(str(len(all_diffs)) + ' diffs above ' + str(mindiff) + ' seconds')
print('New mindiff ' + str(mindiff_all) + ' seconds')

Starting with 75 diffs
69 diffs above 0.05 seconds
New mindiff 0.0993 seconds


Periodicity search and random search of all pulses from 57645

In [11]:
npulses=len(all_diffs)+1 #number of pulses to randomly distribute
all_best_periods=[] #array which holds periods found by period search on whole data at each tolerance

for tol in tols:
  best_by_matches=[]
  most_matches, mm_periods, best_by_matches = p_search(all_diffs,tol)
  best_periods = mm_periods['period'].to_numpy() #periods which match max # diffs to real data
  tol_pct = int(np.round(tol*100,0))
  print('Tolerance: ' + str(tol_pct) + '%')
  print('Best periods match ' + str(most_matches) + '/' + str(len(all_diffs)) + ' diffs')

  for per in best_periods:
    all_best_periods.append(per)
    
  #Search over random timeseries ntrials times  
  random_df = random_ts_search(57645,tol,ntrials,mindiff,False)
  gt_matches = random_df[(random_df['ndiffs']>=most_matches)]
  
  counts=0
  for i in range(0,len(gt_matches)):
    counts += gt_matches.iloc[i,1]
  print(str(counts) + '/' + str(ntrials) + ' random trials match at least as many diffs as this')
  if counts <= (ntrials)*0.25: #if real data does better than 75% of random trials
    print(mm_periods) #print best periods at this tolerance

  ###Done iterating over all tolerances###    

Tolerance: 1%
Best periods match 4.0/69 diffs
994/1000 random trials match at least as many diffs as this
Tolerance: 2%
Best periods match 5.0/69 diffs
998/1000 random trials match at least as many diffs as this
Tolerance: 3%
Best periods match 7.0/69 diffs
970/1000 random trials match at least as many diffs as this
Tolerance: 4%
Best periods match 8.0/69 diffs
980/1000 random trials match at least as many diffs as this
Tolerance: 5%
Best periods match 10.0/69 diffs
869/1000 random trials match at least as many diffs as this
Tolerance: 6%
Best periods match 11.0/69 diffs
886/1000 random trials match at least as many diffs as this
Tolerance: 7%
Best periods match 12.0/69 diffs
911/1000 random trials match at least as many diffs as this
Tolerance: 8%
Best periods match 13.0/69 diffs
922/1000 random trials match at least as many diffs as this
Tolerance: 9%
Best periods match 15.0/69 diffs
672/1000 random trials match at least as many diffs as this
Tolerance: 10%
Best periods match 15.0/69

Same process, but for 3/4 of the pulses from 57645

In [12]:
all_tq_periods=[] #Best periods from subset search
for tol in tols: 
  tol_pct = int(np.round(tol*100,0))
  print('Tolerance: ' + str(tol_pct) + '%')
    
  #Search over randomly chosen subset of 3/4 of real data, ntrials_subset times 
  random_subset_df, all_rand_bps, all_bp_matches = random_subset_search(burst_times, tol, ntrials_subset, mindiff,True)
  most_matches = random_subset_df.iloc[0,0]
  ncounts = random_subset_df.iloc[0,1]
  print('Best real periods over all trials matched ' + str(most_matches) + ' diffs, which occurred in ' + str(ncounts) + '/' + str(ntrials_subset) + ' trials')
  
  #Search over same-length timeseries with 3/4 of randomly distributed pulses, ntrials times 
  random_df = random_ts_search(57644,tol,ntrials,mindiff,True)
  gt_matches = random_df[(random_df['ndiffs']>=most_matches)]
  
  counts=0
  for i in range(0,len(gt_matches)):
    counts += gt_matches.iloc[i,1]
  print(str(counts) + '/' + str(ntrials) + ' random trials match at least as many diffs as this')
  if counts <= (ntrials)*0.25: #if real data does better than 75% of random trials
    #Get the indices of the array of max matches where the most number of matches are found
    index = (np.where(all_bp_matches>=most_matches))[0] 
    #print best periods at this tolerance
    print('Best periods at this tolerance: (s)')
    for k in index:
      print(all_rand_bps[k]) 
      all_tq_periods.append(all_rand_bps[k])
  

all_tq_periods=np.unique(all_tq_periods)
all_tq_periods=np.sort(all_tq_periods)
print(all_tq_periods)  


#all_rand_bps=np.unique(all_rand_bps)

#for per in all_best_periods:
    #for randper in all_rand_bps:
        #diff = abs(per-randper)
        #if diff<(0.005*per): #if real period and random period are close together
          #print(str(np.round(per,5)) + ' is close to rand period ' + str(np.round(randper,5)))

Tolerance: 1%
Best real periods over all trials matched 4.0 diffs, which occurred in 36/100 trials
737/1000 random trials match at least as many diffs as this
Tolerance: 2%
Best real periods over all trials matched 6.0 diffs, which occurred in 4/100 trials
191/1000 random trials match at least as many diffs as this
Best periods at this tolerance: (s)
2.803371535680656
0.6783996767274314
8.222763849023197
0.40077969980692224
Tolerance: 3%
Best real periods over all trials matched 9.0 diffs, which occurred in 1/100 trials
3/1000 random trials match at least as many diffs as this
Best periods at this tolerance: (s)
0.6588376229526577
Tolerance: 4%
Best real periods over all trials matched 9.0 diffs, which occurred in 1/100 trials
26/1000 random trials match at least as many diffs as this
Best periods at this tolerance: (s)
0.09676546730399835
Tolerance: 5%
Best real periods over all trials matched 10.0 diffs, which occurred in 1/100 trials
11/1000 random trials match at least as many diff

997/1000 random trials match at least as many diffs as this
Tolerance: 50%
Best real periods over all trials matched 27.0 diffs, which occurred in 2/100 trials
985/1000 random trials match at least as many diffs as this
[0.09676547 0.39889642 0.4007797  0.58626791 0.65883762 0.67839968
 0.77910147 1.0910534  2.80337154 5.13403584 8.22276385]
