# LoTSS DR1
## PyBDSF radio properties

In [182]:
import pandas as pd
import numpy as np
from astropy.table import Table, join

#### CATALOGUES

In [183]:
# Creating a function to read the fits catalogues
def read_fits(file):
    'converts a fits table to pandas format'
    cat = Table.read(file)
    return cat.to_pandas()

In [184]:
# Loading the cleaned catalogues - PyBDSF - Optical associations over the HETDEX field
# from PyBDSF_DR1_associations.ipynb 

# Gaussian catalogue 
gauss = read_fits('gauss_cleaned.fits')
# Cleaned pybdsf catalogue 
pybdsf = read_fits('pybdsf_cleaned.fits')
# Output_table 
pybdsf_optical = read_fits('output_table.fits')

In [185]:
# Renaming columns (easier to merge)
pybdsf_optical = pybdsf_optical.rename(columns={'pybdsf_name':'Source_Name'})

#### CREATING THE OUTPUT

In [186]:
# Taking the number of gaussians that make up each source
n_gauss = pd.DataFrame({'Source_Name': gauss['Source_Name'].value_counts().index,
                        'n_gauss':gauss['Source_Name'].value_counts().values})

In [187]:
# Creating an output table
output = pd.DataFrame(columns = ['Source_Name','Min' ,'Maj', 'n_gauss', 'flag'])

# Selecting sources with flags:
flag = 1, 4, 8, 16

# Creating the dataframe
for i in flag:
    
    # Taking the source names
    source_names = pybdsf[pybdsf['Source_Name'].
                          isin(pybdsf_optical[pybdsf_optical['flag'] == i]['Source_Name'])]
    
    # Merging the 3 catalogues: 
    # - gaussians,
    # - radio pybdfs properties,
    # - and pybdsf_optical associations
    merged_cat = source_names.merge(n_gauss,on = 'Source_Name').merge(pybdsf_optical, on = 'Source_Name')\
                                    [['Source_Name','Min' ,'Maj', 'n_gauss', 'flag']]
    
    # Dealing with the deblended sources with flags 4 which have also a flag 12 associated
    if i == 4:
        # Selecting the PyBDSFs that share both flags
        shared_sources = merged_cat[merged_cat['flag'] == 12]['Source_Name']
        shared_sources_df = merged_cat[merged_cat['Source_Name'].isin(shared_sources)]
        # Dropping these from the merged catalogue and eliminating duplicated rows
        merged_cat = merged_cat.drop(shared_sources_df.index.values).drop_duplicates()  
    else:
        # For the other flags it is a one-to-one relation
        pass
    
    # Creating a copy of the merged catalogue to change the flags 
    df = merged_cat.copy()
    # Giving flags 1,2,3,4 to each group 
    df['flag'] = int(round(np.sqrt(i)))
    
    # Appending all the sources 
    output = output.append(df, ignore_index=True)

#### CONFIRMING THE NUMBER OF SOURCES

In [308]:
print ('total:', len(output))
new_flags = 1, 2, 3, 4
for i in new_flags:
    print ('flag', i, len(output[output['flag'] == i]))

('total:', 325568)
('flag', 1, 313161)
('flag', 2, 857)
('flag', 3, 9007)
('flag', 4, 2543)


In [311]:
# we get this number of deblended sources because of the ones in common with flag 12 (23 sources)
len(pybdsf_optical[pybdsf_optical['flag'] == 4].groupby('Source_Name')) - 23

857

In [313]:
# Difference number between the catalogues
len(pybdsf_optical.groupby('Source_Name')) - len(output)

126

In [314]:
# Confirming where the difference comes from
len(pybdsf_optical.groupby('Source_Name')) - len(output) -\
len(pybdsf_optical[pybdsf_optical['flag'] == 32]) -\
len(pybdsf_optical[pybdsf_optical['flag'] == 3]) -\
len(pybdsf_optical[pybdsf_optical['flag'] == 10]) -\
len(pybdsf_optical[pybdsf_optical['flag'] == 12].groupby('Source_Name'))

0

#### EXPORTING THE RESULTS

In [324]:
# Need to change to numeric before exporting...
output['flag'] = pd.to_numeric(output['flag'])
output['n_gauss'] = pd.to_numeric(output['n_gauss'])
# Making an astropy table
output_cat = Table.from_pandas(output)
output_cat.write('output_radio.fits', overwrite = True)