Permalink
Browse files

Mering more of Scott's and committing beta version of GBNCC_search.py.

  • Loading branch information...
1 parent 22e785c commit 19cc148ddf5c9dd8c420a95f41381f5d07653df5 Kevin Stovall committed Sep 8, 2011
View
@@ -2,17 +2,16 @@
import glob, os, os.path, shutil, socket, struct, tarfile, stat
import numpy, sys, presto, time, sigproc, sifting
import psr_utils as pu
+import pyfits
-institution = "NRAOCV"
+institution = "UTB"
base_tmp_dir = "/dev/shm/"
-base_output_dir = "/home/sransom/results/GBT/GBNCC"
+base_output_dir = "/home/kstovall/data/GBNCC/results/"
#-------------------------------------------------------------------
# Tunable parameters for searching and folding
# (you probably don't need to tune any of them)
-orig_N = 1440000 # Number of samples to analyze at a time (~118 sec)
-raw_N = 1900000 # Number of samples to step through .fits files
-overlap_factor = 0.5 # Overlap each orig_N samples by this fraction
+raw_N = 1440000 # Number of samples to analyze (~118 secs)
rfifind_chunk_time = 25600 * 0.00008192 # ~2.1 sec
singlepulse_threshold = 5.0 # threshold SNR for candidate determination
singlepulse_plot_SNR = 5.5 # threshold SNR for singlepulse plot
@@ -38,6 +37,7 @@
sifting.short_period = 0.0005 # Shortest period candidates to consider (s)
sifting.long_period = 15.0 # Longest period candidates to consider (s)
sifting.harm_pow_cutoff = 8.0 # Power required in at least one harmonic
+foldnsubs = 128 # Number of subbands to use when folding
#-------------------------------------------------------------------
def get_baryv(ra, dec, mjd, T, obs="GB"):
@@ -52,6 +52,7 @@ def get_baryv(ra, dec, mjd, T, obs="GB"):
nn = len(tts)
bts = numpy.zeros(nn, dtype=numpy.float64)
vel = numpy.zeros(nn, dtype=numpy.float64)
+
presto.barycenter(tts, bts, vel, nn, ra, dec, obs, "DE200")
return vel.mean()
@@ -108,9 +109,9 @@ def get_folding_command(cand, obs, ddplans):
downsamp = dfact
break
if downsamp==1:
- filfile = obs.fil_filenm
+ fitsfile = obs.fits_filenm
else:
- filfile = obs.basefilenm+"_DS%d.fil"%downsamp
+ fitsfile = obs.dsbasefilenm+"_DS%d%s"%(downsamp,obs.fits_filenm[obs.fits_filenm.rfind("_"):])
p = 1.0 / cand.f
if (p < 0.002):
Mp, Mdm, N = 2, 2, 24
@@ -124,36 +125,36 @@ def get_folding_command(cand, obs, ddplans):
else:
Mp, Mdm, N = 1, 1, 200
otheropts = "-npart 30 -nopdsearch -pstep 1 -pdstep 2 -dmstep 1"
- return "prepfold -noxwin -accelcand %d -accelfile %s.cand -dm %.2f -o %s %s -n %d -npfact %d -ndmfact %d %s" % \
+ return "prepfold -noxwin -accelcand %d -accelfile %s.cand -dm %.2f -o %s %s -n %d -npfact %d -ndmfact %d -nsub %d %s" % \
(cand.candnum, cand.filename, cand.DM, outfilenm,
- otheropts, N, Mp, Mdm, filfile)
+ otheropts, N, Mp, Mdm, foldnsubs, fitsfile)
class obs_info:
"""
- class obs_info(fil_filenm)
+ class obs_info(fits_filenm)
A class describing the observation and the analysis.
"""
- def __init__(self, fil_filenm):
- self.fil_filenm = fil_filenm
- self.basefilenm = fil_filenm[:fil_filenm.find(".fil")]
- filhdr, hdrlen = sigproc.read_header(fil_filenm)
- self.MJD = filhdr['tstart']
- self.nchans = filhdr['nchans']
- self.ra_rad = sigproc.ra2radians(filhdr['src_raj'])
- self.ra_string = pu.coord_to_string(*pu.rad_to_hms(self.ra_rad))
- self.dec_rad = sigproc.dec2radians(filhdr['src_dej'])
- self.dec_string = pu.coord_to_string(*pu.rad_to_dms(self.dec_rad))
+ def __init__(self, fits_filenm):
+ self.fits_filenm = fits_filenm
+ self.basefilenm = fits_filenm[:fits_filenm.find(".fits")]
+ self.dsbasefilenm = fits_filenm[:fits_filenm.rfind("_")]
+ fitshandle=pyfits.open(fits_filenm)
+ self.MJD = fitshandle[0].header['STT_IMJD']+fitshandle[0].header['STT_SMJD']/86400.0+fitshandle[0].header['STT_OFFS']/86400.0
+ self.nchans = fitshandle[0].header['OBSNCHAN']
+ self.ra_string = fitshandle[0].header['RA']
+ self.dec_string = fitshandle[0].header['DEC']
self.str_coords = "J"+"".join(self.ra_string.split(":")[:2])
- if self.dec_rad >= 0.0: self.str_coords += "+"
self.str_coords += "".join(self.dec_string.split(":")[:2])
- self.az = filhdr['az_start']
- self.el = 90.0-filhdr['za_start']
- fillen = os.stat(fil_filenm)[6]
- self.raw_N = (fillen-hdrlen)/(filhdr['nbits']/8)/filhdr['nchans']
- self.dt = filhdr['tsamp']
+ self.nbits=fitshandle[0].header['BITPIX']
+
+ self.raw_N=fitshandle[1].header['NAXIS2']*fitshandle[1].header['NSBLK']
+ self.dt=fitshandle[1].header['TBIN']*1000000
self.raw_T = self.raw_N * self.dt
- self.N = orig_N
+ self.N = raw_N
+ if self.dt == 163.84:
+ self.N=self.N/2
self.T = self.N * self.dt
+ self.srcname=fitshandle[0].header['SRC_NAME']
# Determine the average barycentric velocity of the observation
self.baryv = get_baryv(self.ra_string, self.dec_string,
self.MJD, self.T, obs="GB")
@@ -162,7 +163,7 @@ def __init__(self, fil_filenm):
# according to base/MJD/filenmbase/beam
self.outputdir = os.path.join(base_output_dir,
str(int(self.MJD)),
- self.str_coords)
+ self.srcname)
# Figure out which host we are processing on
self.hostname = socket.gethostname()
# The fraction of the data recommended to be masked by rfifind
@@ -186,7 +187,7 @@ def __init__(self, fil_filenm):
def write_report(self, filenm):
report_file = open(filenm, "w")
report_file.write("---------------------------------------------------------\n")
- report_file.write("%s was processed on %s\n"%(self.fil_filenm, self.hostname))
+ report_file.write("%s was processed on %s\n"%(self.fits_filenm, self.hostname))
report_file.write("Ending UTC time: %s\n"%(time.asctime(time.gmtime())))
report_file.write("Total wall time: %.1f s (%.2f hrs)\n"%\
(self.total_time, self.total_time/3600.0))
@@ -234,18 +235,57 @@ def __init__(self, lodm, dmstep, dmsperpass, numpasses, numsub, downsamp):
numpy.arange(self.dmsperpass)*self.dmstep + lodm]
self.dmlist.append(dmlist)
-def main(fil_filenm, workdir, ddplans):
+def remove_crosslist_duplicate_candidates(candlist1,candlist2):
+ n1 = len(candlist1)
+ n2 = len(candlist2)
+ removelist1 = []
+ removelist2 = []
+ candlist2.sort(sifting.cmp_freq)
+ candlist1.sort(sifting.cmp_freq)
+ print " Searching for crosslist dupes..."
+ ii = 0
+ while ii < n1:
+ jj=0
+ while jj < n2:
+ if numpy.fabs(candlist1[ii].r-candlist2[jj].r) < sifting.r_err:
+ if sifting.cmp_sigma(candlist1[ii],candlist2[jj])<0:
+ print "Crosslist remove from candlist 2, %f > %f, %d:%f~%f" % (candlist1[ii].sigma,candlist2[jj].sigma,jj,candlist1[ii].r,candlist2[jj].r)
+ if jj not in removelist2:
+ removelist2.append(jj)
+ else:
+ print "Crosslist remove from candlist 1, %f > %f, %d:%f~%f" % (candlist2[jj].sigma,candlist1[ii].sigma,ii,candlist1[ii].r,candlist2[jj].r)
+ if ii not in removelist1:
+ removelist1.append(ii)
+ jj += 1
+ ii += 1
+ for ii in range(len(removelist2)-1,-1,-1):
+ print "Removing %d from candlist2" % removelist2[ii]
+ del(candlist2[removelist2[ii]])
+ for ii in range(len(removelist1)-1,-1,-1):
+ print "Removing %d from candlist1" % removelist1[ii]
+ del(candlist1[removelist1[ii]])
+ print "Removed %d crosslist candidates\n" % (len(removelist1)+len(removelist2))
+ print "Found %d candidates. Sorting them by significance...\n" % (len(candlist1)+len(candlist2))
+ candlist1.sort(sifting.cmp_sigma)
+ candlist2.sort(sifting.cmp_sigma)
+ return candlist1,candlist2
+
+
+def main(fits_filenm, workdir, ddplans):
# Change to the specified working directory
os.chdir(workdir)
# Get information on the observation and the job
- job = obs_info(fil_filenm)
+ job = obs_info(fits_filenm)
if job.raw_T < low_T_to_search:
print "The observation is too short (%.2f s) to search."%job.raw_T
sys.exit()
job.total_time = time.time()
- ddplans = ddplans[job.nchans]
+ if job.dt == 163.84:
+ ddplans = ddplans[str(job.nchans)+"slow"]
+ else:
+ ddplans = ddplans[str(job.nchans)+"fast"]
# Use whatever .zaplist is found in the current directory
default_zaplist = glob.glob("*.zaplist")[0]
@@ -262,14 +302,19 @@ def main(fil_filenm, workdir, ddplans):
os.makedirs(tmpdir)
except: pass
- print "\nBeginning GBNCC search of '%s'"%job.fil_filenm
+ print "\nBeginning GBNCC search of '%s'"%job.fits_filenm
print "UTC time is: %s"%(time.asctime(time.gmtime()))
- # rfifind the filterbank file
- cmd = "rfifind -time %.17g -o %s %s > %s_rfifind.out"%\
- (rfifind_chunk_time, job.basefilenm,
- job.fil_filenm, job.basefilenm)
- job.rfifind_time += timed_execute(cmd)
+ rfifindout=job.basefilenm+"_rfifind.out"
+ rfifindmask=job.basefilenm+"_rfifind.mask"
+
+ if not os.path.exists(rfifindout) or not os.path.exists(rfifindmask):
+
+ # rfifind the filterbank file
+ cmd = "rfifind -time %.17g -o %s %s > %s_rfifind.out"%\
+ (rfifind_chunk_time, job.basefilenm,
+ job.fits_filenm, job.basefilenm)
+ job.rfifind_time += timed_execute(cmd)
maskfilenm = job.basefilenm + "_rfifind.mask"
# Find the fraction that was suggested to be masked
# Note: Should we stop processing if the fraction is
@@ -278,26 +323,28 @@ def main(fil_filenm, workdir, ddplans):
# Iterate over the stages of the overall de-dispersion plan
dmstrs = []
+
for ddplan in ddplans:
# Make a downsampled filterbank file
if ddplan.downsamp > 1:
- cmd = "downsample_filterbank.py %d %s"%(ddplan.downsamp, job.fil_filenm)
+ cmd = "psrfits_subband -dstime %d -nsub %d -o %s_DS%d %s"%\
+ (ddplan.downsamp, job.nchans, job.dsbasefilenm, ddplan.downsamp, job.dsbasefilenm )
job.downsample_time += timed_execute(cmd)
- fil_filenm = job.fil_filenm[:job.fil_filenm.find(".fil")] + \
- "_DS%d.fil"%ddplan.downsamp
+ fits_filenm = job.dsbasefilenm + "_DS%d%s"%\
+ (ddplan.downsamp,job.fits_filenm[job.fits_filenm.rfind("_"):])
else:
- fil_filenm = job.fil_filenm
-
+ fits_filenm = job.fits_filenm
# Iterate over the individual passes through the .fil file
for passnum in range(ddplan.numpasses):
subbasenm = "%s_DM%s"%(job.basefilenm, ddplan.subdmlist[passnum])
# Now de-disperse
cmd = "prepsubband -mask %s -lodm %.2f -dmstep %.2f -nsub %d -numdms %d -numout %d -o %s/%s %s"%\
- (maskfilenm, ddplan.lodm+passnum*ddplan.sub_dmstep, ddplan.dmstep,
- ddplan.numsub, ddplan.dmsperpass, job.N/ddplan.downsamp,
- tmpdir, job.basefilenm, fil_filenm)
+ (maskfilenm, ddplan.lodm+passnum*ddplan.sub_dmstep,
+ ddplan.dmstep, ddplan.numsub,
+ ddplan.dmsperpass, job.N/ddplan.downsamp,
+ tmpdir, job.basefilenm, fits_filenm)
job.dedispersing_time += timed_execute(cmd)
# Iterate over all the new DMs
@@ -329,7 +376,7 @@ def main(fil_filenm, workdir, ddplans):
except: pass
# Do the low-acceleration search
- cmd = "accelsearch -numharm %d -sigma %f -zmax %d -flo %f %s"%\
+ cmd = "accelsearch -harmpolish -numharm %d -sigma %f -zmax %d -flo %f %s"%\
(lo_accel_numharm, lo_accel_sigma, lo_accel_zmax, lo_accel_flo, fftnm)
job.lo_accelsearch_time += timed_execute(cmd)
try:
@@ -341,7 +388,7 @@ def main(fil_filenm, workdir, ddplans):
except: pass
# Do the high-acceleration search
- cmd = "accelsearch -numharm %d -sigma %f -zmax %d -flo %f %s"%\
+ cmd = "accelsearch -harmpolish -numharm %d -sigma %f -zmax %d -flo %f %s"%\
(hi_accel_numharm, hi_accel_sigma, hi_accel_zmax, hi_accel_flo, fftnm)
job.hi_accelsearch_time += timed_execute(cmd)
try:
@@ -398,17 +445,21 @@ def main(fil_filenm, workdir, ddplans):
if len(lo_accel_cands):
lo_accel_cands = sifting.remove_DM_problems(lo_accel_cands, numhits_to_fold,
dmstrs, low_DM_cutoff)
- if len(lo_accel_cands):
- lo_accel_cands.sort(sifting.cmp_sigma)
- sifting.write_candlist(lo_accel_cands,
- job.basefilenm+".accelcands_Z%d"%lo_accel_zmax)
hi_accel_cands = sifting.read_candidates(glob.glob("*ACCEL_%d"%hi_accel_zmax))
if len(hi_accel_cands):
hi_accel_cands = sifting.remove_duplicate_candidates(hi_accel_cands)
if len(hi_accel_cands):
hi_accel_cands = sifting.remove_DM_problems(hi_accel_cands, numhits_to_fold,
dmstrs, low_DM_cutoff)
+
+ if len(lo_accel_cands) and len(hi_accel_cands):
+ lo_accel_cands, hi_accel_cands = remove_crosslist_duplicate_candidates(lo_accel_cands, hi_accel_cands)
+
+ if len(lo_accel_cands):
+ lo_accel_cands.sort(sifting.cmp_sigma)
+ sifting.write_candlist(lo_accel_cands,
+ job.basefilenm+".accelcands_Z%d"%lo_accel_zmax)
if len(hi_accel_cands):
hi_accel_cands.sort(sifting.cmp_sigma)
sifting.write_candlist(hi_accel_cands,
@@ -480,11 +531,11 @@ def main(fil_filenm, workdir, ddplans):
os.remove(infile)
tf.close()
- # Remove all the downsampled .fil files
+ # Remove all the downsampled .fits files
- filfiles = glob.glob("*_DS?.fil") + glob.glob("*_DS??.fil")
- for filfile in filfiles:
- os.remove(filfile)
+ fitsfiles = glob.glob("*_DS?*.fits") + glob.glob("*_DS??*.fits")
+ for fitsfile in fitsfiles:
+ os.remove(fitsfile)
# Remove the tmp directory (in a tmpfs mount)
try:
@@ -516,7 +567,7 @@ def main(fil_filenm, workdir, ddplans):
#
# If there is <=1GB of RAM per CPU core, the following are preferred
#
- # For 4096slow chan data: lodm dmstep dms/call #calls #subs downsamp
+ # For 4096slow chan data: lodm dmstep dms/call #calls #subs downsamp
ddplans['4096slow'].append(dedisp_plan( 0.0, 0.02, 86, 81, 128, 1))
ddplans['4096slow'].append(dedisp_plan(139.32, 0.03, 102, 27, 128, 2))
ddplans['4096slow'].append(dedisp_plan(221.94, 0.05, 102, 33, 128, 4))
@@ -547,10 +598,10 @@ def main(fil_filenm, workdir, ddplans):
# sys.argv[2] = working directory name
if len(sys.argv) >= 3:
workdir = sys.argv[2]
- fil_filenm = sys.argv[1]
- main(fil_filenm, workdir, ddplans)
+ fits_filenm = sys.argv[1]
+ main(fits_filenm, workdir, ddplans)
elif len(sys.argv) == 2:
- fil_filenm = sys.argv[1]
- main(fil_filenm, '.', ddplans)
+ fits_filenm = sys.argv[1]
+ main(fits_filenm, '.', ddplans)
else:
- print "GBT350_drift_search.py fil_filenm [workdir]"
+ print "GBNCC_search.py fits_filenm [workdir]"
Oops, something went wrong.

0 comments on commit 19cc148

Please sign in to comment.