# Analyzing the Distances Between Beams

Start off by importing any necessary packages. Check the README for the versions of each of the packages being used.

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import seaborn as sns
from tabulate import tabulate
from astropy.time import Time
from tqdm import tqdm
import csv
import math

Open up the .pickle files. The 4_beams.pkl file has all of the hits that are present in 4 of the beams in a given field of view. The same is true for 5_beams.pkl but the hit is present in 5 of the beams. However, there is a caveat here in that all of the hits that were missing an incoherent beam were excluded. These were assumed to be low intensity RFI, which when the average was taken to create the incoherent beam were excluded but were still classified as being hits in 4-5 of the coherent beams.

In [36]:
with open('../../2_beams_coherent.pkl', 'rb') as f:
    hit_2 = pickle.load(f)
print(hit_2.shape)
file_path_2 = hit_2["file_path"]
unique_file_path_2 = np.unique(file_path_2) #the array of unique fields of view

with open('../../3_beams.pkl', 'rb') as f:
    hit_3 = pickle.load(f)
print(hit_3.shape)
file_path_3 = hit_3["file_path"]
unique_file_path_3 = np.unique(file_path_3) #the array of unique fields of view

with open('../../4_beams.pkl', 'rb') as f:
    hit_4 = pickle.load(f)
print(hit_4.shape)
file_path_4 = hit_4["file_path"]
unique_file_path_4 = np.unique(file_path_4) #the array of unique fields of view

with open('../../5_beams.pkl', 'rb') as f:
    hit_5 = pickle.load(f)
print(hit_5.shape)
file_path_5 = hit_5["file_path"]
unique_file_path_5 = np.unique(file_path_5) #the array of unique fields of view

(45728, 24)
(38604, 24)
(73840, 24)
(181150, 24)


This data was taken at S band with the VLA in the B configuration. This would mean that the beam size should be 2.1 arcseconds. So here's my plan:

- Clump together each hit 
- Find the distances between each of the coordinates in RA and DEC
- Convert these distances to arcseconds

Now there a few other things to consider here. The center of the incoherent beam is usually offset from where all of the coherent beams are, so the distances should be calculated for each of the points aside from the incoherent beam so that it does not cause issues with the distances. We will then filter out any of them that have a distance greater than 6.3 arcseconds.

In [16]:
right_ascension = hit_5["ra"]
declination = hit_5["dec"]
beam = hit_5["beam"]

print(beam[2], beam[1])
print((right_ascension[1]-right_ascension[2])*math.pi*3600/180)
print((declination[1]-declination[2])*math.pi*3600/180)
print(math.sqrt(((right_ascension[1]-right_ascension[2])*math.pi*3600/180)**2+((declination[1]-declination[2])*math.pi*3600/180)**2))

2.0 4.0
0.5621356454823611
-5.987875597742031
6.014204075183711


So I'm going to make a for loop that goes through all of the clumps, and then does the above calculation for each of the combinations, except for the coherent beam. To do this, once I pull the clump, I'll sort it by the beam and then only look at the combinations that occur within beams 0-4. I will append all of the distances to an array for the specific clump. Then if any of the values in the array are greater than 6.3, the clump will not get appended to the dataframe. 

In [31]:
df2 = pd.DataFrame({"file_path":[],
                    "hit_file_enumeration":[],
                    "signal_frequency":[],
                    "signal_index":[],
                    "signal_driftSteps":[],
                    "signal_driftRate":[],
                    "signal_snr":[],
                    "signal_coarseChannel":[],
                    "signal_numTimesteps":[],
                    "signal_power":[],
                    "signal_incoherentPower":[],
                    "sourceName":[],
                    "fch1":[],
                    "foff":[],
                    "tstart":[],
                    "tsamp":[],
                    "ra":[],
                    "dec":[],
                    "telescopeId":[],
                    "numTimesteps":[],
                    "numChannels":[],
                    "coarseChannel":[],
                    "startChannel":[],
                    "beam":[]})

for i in tqdm(range(int(len(file_path_5)/5))):
    data_fov_subset = hit_5.loc[i*5:(i*5)+4] #select each subset    
    #It is already organized by frequency so we don't need to sort by frequency, but still do that so that it can be sorted by 
    #frequency and then by beam
    data_fov_subset = data_fov_subset.sort_values(by = ["signal_frequency", "beam"])

    right_ascension = np.array(data_fov_subset["ra"]) #define the column for right ascension
    declination = np.array(data_fov_subset["dec"]) #define the column for the declination
    beam = np.array(data_fov_subset["beam"])
    
    distances = []
    for j in range(len(right_ascension)-1):
        for k in range(len(right_ascension)-1): #Have it loop over the clump twice to get all of the combinations
            #This will be very oversampled, but it doesn't matter too much because I only care about the maximum value
            
            ra_diff = (right_ascension[j]-right_ascension[k])*math.pi*3600/180 
            #Difference between the ra coordinates in arcseconds
            
            dec_diff = (declination[j]-declination[k])*math.pi*3600/180
            #Difference between the dec coordinates in arcseconds
            
            dist = math.sqrt(ra_diff**2+dec_diff**2)
            #Distance formula
            
            distances.append(dist)
            #Append all of the distances to the list
    
    if np.max(distances) < 6.3:
        #Want to make sure that all of the sources are within 3 beams of each other. With a beam size of 2.1 arcseconds, I want
        #them to be at most 6.3 arcseconds away. If even one of the distances is greater than 6.3, then the entire clump is not
        #appended
        
        appending_rows = data_fov_subset
        df2 = df2.append(appending_rows, ignore_index = True)
    else:
        continue    

with open('../../beam_separation_5_hits.pkl', 'wb') as f:  # open a text file
    pickle.dump(df2, f) # serialize the list
f.close()

print(df2.shape)

100%|██████████| 36230/36230 [01:52<00:00, 322.40it/s]


(87350, 24)


So I've taken a look now to see the separations of the hits that are present in 5 beams. I'm going to repeat the process for the hits that did have an incoherent beam and were present in 4, 3, and 2 beams. I am ending each cell by saving the resulting dataframe to a .pickle file and printing out the shape so that I can see how many hits are remaining.

This cell is showing the code to find the spacing for the hits that were observed in four beams

In [32]:
df2 = pd.DataFrame({"file_path":[],
                    "hit_file_enumeration":[],
                    "signal_frequency":[],
                    "signal_index":[],
                    "signal_driftSteps":[],
                    "signal_driftRate":[],
                    "signal_snr":[],
                    "signal_coarseChannel":[],
                    "signal_numTimesteps":[],
                    "signal_power":[],
                    "signal_incoherentPower":[],
                    "sourceName":[],
                    "fch1":[],
                    "foff":[],
                    "tstart":[],
                    "tsamp":[],
                    "ra":[],
                    "dec":[],
                    "telescopeId":[],
                    "numTimesteps":[],
                    "numChannels":[],
                    "coarseChannel":[],
                    "startChannel":[],
                    "beam":[]})

for i in tqdm(range(int(len(file_path_4)/4))):
    data_fov_subset = hit_4.loc[i*4:(i*4)+3] #select each subset    
    #It is already organized by frequency so we don't need to sort by frequency, but still do that so that it can be sorted by 
    #frequency and then by beam
    data_fov_subset = data_fov_subset.sort_values(by = ["signal_frequency", "beam"])

    right_ascension = np.array(data_fov_subset["ra"]) #define the column for right ascension
    declination = np.array(data_fov_subset["dec"]) #define the column for the declination
    beam = np.array(data_fov_subset["beam"])
    
    distances = []
    for j in range(len(right_ascension)-1):
        for k in range(len(right_ascension)-1): #Have it loop over the clump twice to get all of the combinations
            #This will be very oversampled, but it doesn't matter too much because I only care about the maximum value
            
            ra_diff = (right_ascension[j]-right_ascension[k])*math.pi*3600/180 
            #Difference between the ra coordinates in arcseconds
            
            dec_diff = (declination[j]-declination[k])*math.pi*3600/180
            #Difference between the dec coordinates in arcseconds
            
            dist = math.sqrt(ra_diff**2+dec_diff**2)
            #Distance formula
            
            distances.append(dist)
            #Append all of the distances to the list
    
    if np.max(distances) < 6.3:
        #Want to make sure that all of the sources are within 3 beams of each other. With a beam size of 2.1 arcseconds, I want
        #them to be at most 6.3 arcseconds away. If even one of the distances is greater than 6.3, then the entire clump is not
        #appended
        
        appending_rows = data_fov_subset
        df2 = df2.append(appending_rows, ignore_index = True)
    else:
        continue    

with open('../../beam_separation_4_hits.pkl', 'wb') as f:  # open a text file
    pickle.dump(df2, f) # serialize the list
f.close()

print(df2.shape)

100%|██████████| 18460/18460 [00:33<00:00, 546.10it/s]


(31544, 24)


Here are the hits that were observed in 3 beams. It is just looking at the distances between the two coherent beams and keeping the hits where the distance is a maximum of 6.3"

In [33]:
df2 = pd.DataFrame({"file_path":[],
                    "hit_file_enumeration":[],
                    "signal_frequency":[],
                    "signal_index":[],
                    "signal_driftSteps":[],
                    "signal_driftRate":[],
                    "signal_snr":[],
                    "signal_coarseChannel":[],
                    "signal_numTimesteps":[],
                    "signal_power":[],
                    "signal_incoherentPower":[],
                    "sourceName":[],
                    "fch1":[],
                    "foff":[],
                    "tstart":[],
                    "tsamp":[],
                    "ra":[],
                    "dec":[],
                    "telescopeId":[],
                    "numTimesteps":[],
                    "numChannels":[],
                    "coarseChannel":[],
                    "startChannel":[],
                    "beam":[]})

for i in tqdm(range(int(len(file_path_3)/3))):
    data_fov_subset = hit_3.loc[i*3:(i*3)+2] #select each subset    
    #It is already organized by frequency so we don't need to sort by frequency, but still do that so that it can be sorted by 
    #frequency and then by beam
    data_fov_subset = data_fov_subset.sort_values(by = ["signal_frequency", "beam"])

    right_ascension = np.array(data_fov_subset["ra"]) #define the column for right ascension
    declination = np.array(data_fov_subset["dec"]) #define the column for the declination
    beam = np.array(data_fov_subset["beam"])
    
    distances = []
    for j in range(len(right_ascension)-1):
        for k in range(len(right_ascension)-1): #Have it loop over the clump twice to get all of the combinations
            #This will be very oversampled, but it doesn't matter too much because I only care about the maximum value
            
            ra_diff = (right_ascension[j]-right_ascension[k])*math.pi*3600/180 
            #Difference between the ra coordinates in arcseconds
            
            dec_diff = (declination[j]-declination[k])*math.pi*3600/180
            #Difference between the dec coordinates in arcseconds
            
            dist = math.sqrt(ra_diff**2+dec_diff**2)
            #Distance formula
            
            distances.append(dist)
            #Append all of the distances to the list
    
    if np.max(distances) < 6.3:
        #Want to make sure that all of the sources are within 3 beams of each other. With a beam size of 2.1 arcseconds, I want
        #them to be at most 6.3 arcseconds away. If even one of the distances is greater than 6.3, then the entire clump is not
        #appended
        
        appending_rows = data_fov_subset
        df2 = df2.append(appending_rows, ignore_index = True)
    else:
        continue    

with open('../../beam_separation_3_hits.pkl', 'wb') as f:  # open a text file
    pickle.dump(df2, f) # serialize the list
f.close()

print(df2.shape)

100%|██████████| 12868/12868 [00:26<00:00, 491.39it/s]


(25734, 24)


This one is a little bit different than the previous three cells. Instead of having the pairs of an incoherent and a coherent beam, this has pairs of coherent beams where there was no corresponding incoherent beam for the hit. The idea for these hits is that it was a localized signal but at a very low intensity where it got washed out in the averaging to create the incoherent beam. If anything, these could be the more interesting things to take a look at. 

In [37]:
df2 = pd.DataFrame({"file_path":[],
                    "hit_file_enumeration":[],
                    "signal_frequency":[],
                    "signal_index":[],
                    "signal_driftSteps":[],
                    "signal_driftRate":[],
                    "signal_snr":[],
                    "signal_coarseChannel":[],
                    "signal_numTimesteps":[],
                    "signal_power":[],
                    "signal_incoherentPower":[],
                    "sourceName":[],
                    "fch1":[],
                    "foff":[],
                    "tstart":[],
                    "tsamp":[],
                    "ra":[],
                    "dec":[],
                    "telescopeId":[],
                    "numTimesteps":[],
                    "numChannels":[],
                    "coarseChannel":[],
                    "startChannel":[],
                    "beam":[]})

for i in tqdm(range(int(len(file_path_2)/2))):
    data_fov_subset = hit_2.loc[i*2:(i*2)+1] #select each subset    
    #It is already organized by frequency so we don't need to sort by frequency, but still do that so that it can be sorted by 
    #frequency and then by beam
    data_fov_subset = data_fov_subset.sort_values(by = ["signal_frequency", "beam"])

    right_ascension = np.array(data_fov_subset["ra"]) #define the column for right ascension
    declination = np.array(data_fov_subset["dec"]) #define the column for the declination
    beam = np.array(data_fov_subset["beam"])
    
    distances = []
    for j in range(len(right_ascension)):
        for k in range(len(right_ascension)): #Have it loop over the clump twice to get all of the combinations
            #This will be very oversampled, but it doesn't matter too much because I only care about the maximum value
            
            ra_diff = (right_ascension[j]-right_ascension[k])*math.pi*3600/180 
            #Difference between the ra coordinates in arcseconds
            
            dec_diff = (declination[j]-declination[k])*math.pi*3600/180
            #Difference between the dec coordinates in arcseconds
            
            dist = math.sqrt(ra_diff**2+dec_diff**2)
            #Distance formula
            
            distances.append(dist)
            #Append all of the distances to the list
    
    if np.max(distances) < 6.3:
        #Want to make sure that all of the sources are within 3 beams of each other. With a beam size of 2.1 arcseconds, I want
        #them to be at most 6.3 arcseconds away. If even one of the distances is greater than 6.3, then the entire clump is not
        #appended
        
        appending_rows = data_fov_subset
        df2 = df2.append(appending_rows, ignore_index = True)
    else:
        continue    

with open('../../beam_separation_2_hits.pkl', 'wb') as f:  # open a text file
    pickle.dump(df2, f) # serialize the list
f.close()

print(df2.shape)

100%|██████████| 22864/22864 [00:54<00:00, 416.34it/s]


(37328, 24)
