In [1]:
import pymysql
import numpy as np
from astropy import units as u
from astropy.coordinates import SkyCoord
import pandas, alminer, glob
import matplotlib.pyplot as plt

# Load ALMiner Query Results and Ancillary Data

In this pilot pipeline, I don't have my final sample selected (nor would I like to try running on all of them yet!). For now, I'm grabbing the entire crossmatch and I'll test the pipeline with a small sample of "sure-thing" targets. As a result, the selection process is a bit innefficient and hard to read. This selection is more of an ad hoc approach and different from how I normally select my sample which will be done separately.

KNOWN ISSUES: 

 - In cases where there are multiple observations of a target that meet a given selection, they are being treated as different objects by the same name.

In [2]:
# loading ALMiner query object from the crossmatch between SDSS and the ALMA archive
f = '/arc/projects/salvage/ALMAxmatch/ALminer_output/SDSS-ALMA-search-Jan15-total.pkl'
myquery = pandas.read_pickle(f)

In [3]:
# grabbing a few quantities to select galaxies

res_query = np.array(myquery['spatial_resolution'])
mrs_query = np.array(myquery['spatial_scale_max'])
min_freq_query = np.array(myquery["min_freq_GHz"])
max_freq_query = np.array(myquery["max_freq_GHz"])
ra_query  = np.array(myquery['RAJ2000'])
dec_query = np.array(myquery['DEJ2000'])

antennae   = np.array(myquery['antenna_arrays'])
project    = np.array(myquery['project_code'])
datatype   = np.array(myquery['dataproduct_type'])
datarights = np.array(myquery['data_rights'])
name       = np.array(myquery['ALMA_source_name'])

muid = np.array(myquery['member_ous_uid'])
guid = np.array(myquery['group_ous_uid'])
auid = np.array(myquery['asdm_uid'])
proj = np.array(myquery['project_code'])

In [4]:
#Query MySQL for SDSS ancillary data

db = pymysql.connect(host = 'lauca.phys.uvic.ca', db = 'sdss', user = 'swilkinson', passwd = '123Sdss!@#')
x = 'SELECT u.objID, u.ra, u.decl, u.total_mass_med, u.z_spec, u.petroR50_r FROM  dr7_uberuber u WHERE u.total_mass_med > 1 AND u.z_spec < 0.5 AND u.decl < 47'
c = db.cursor()
c.execute(x)
db_data = c.fetchall()
c.close()
db.close()
	
# transpose queried table
data = np.array(db_data, dtype = float).T
table_id = np.array(db_data, dtype = str).T[0]

In [5]:
ra_sdss = data[1]
dec_sdss = data[2]
mass_sdss = data[3]
z_sdss = data[4]
rpetro_sdss = data[5]

In [6]:
# assign ancillary data to appropriate ALMA observations

objID_query = []
z_query = []
mass_query = []
rpetro_query = []

for i in range(len(ra_query)):
    
    ra_q  = ra_query[i]
    dec_q = dec_query[i]
    
    diff = np.sqrt((ra_sdss-ra_q)**2 + (dec_sdss-dec_q)**2)
    
    if np.min(diff) > 1:
        print('Poor match!')

    else:

        objID_query.append(table_id[diff == np.min(diff)][0])
        z_query.append(z_sdss[diff == np.min(diff)][0])
        mass_query.append(mass_sdss[diff == np.min(diff)][0])
        rpetro_query.append(rpetro_sdss[diff == np.min(diff)][0])

z_query = np.array(z_query)
mass_query = np.array(mass_query)
rpetro_query = np.array(rpetro_query)
objID_query = np.array(objID_query)

In [7]:
objID_sample = []
z_sample = []
mass_sample = []
rpetro_sample = []

ra_sample = []
dec_sample = []
res_sample = []
mrs_sample = []
    
AL_sample = []
AC_sample = []
TP_sample = []

project_sample = []
year_sample = []
name_sample = []

muid_sample = []
guid_sample = []
auid_sample = []
proj_sample = []

rf_12CO_10 = 115.27120180 #GHz

for i in range(len(ra_query)):
        
    rf_12CO_10 = 115.27120180 #GHz
    redshifted_line = rf_12CO_10/(1+z_query[i])

    if (min_freq_query[i] < redshifted_line) & (max_freq_query[i] > redshifted_line) & (datatype[i] == 'cube') & (datarights[i] == 'Public'):

        # values from SDSS (already matched to query)
        objID_sample.append(objID_query[i])
        z_sample.append(z_query[i])
        mass_sample.append(mass_query[i])
        rpetro_sample.append(rpetro_query[i])
        
        # values from ALMA archive
        ra_sample.append(ra_query[i])
        dec_sample.append(dec_query[i])
        res_sample.append(res_query[i])
        mrs_sample.append(mrs_query[i])
        
        arrays = np.array([x.split(':')[1][0] for x in antennae[i].split(' ') if len(x) > 0])
        AL_sample.append(len(arrays[arrays=='D']))
        AC_sample.append(len(arrays[arrays=='C']))
        TP_sample.append(len(arrays[arrays=='P']))
        
        project_sample.append(project[i])
        year_sample.append(int(project[i].split('.')[0]))
        name_sample.append(name[i])

        muid_sample.append(muid[i])
        guid_sample.append(guid[i])
        auid_sample.append(auid[i])
        proj_sample.append(proj[i])


objID_sample = np.array(objID_sample)
z_sample = np.array(z_sample)
mass_sample = np.array(mass_sample)
rpetro_sample = np.array(rpetro_sample)
                          
ra_sample = np.array(ra_sample)
dec_sample = np.array(dec_sample)
res_sample = np.array(res_sample)
mrs_sample = np.array(mrs_sample)
    
AL_sample = np.array(AL_sample)
AC_sample = np.array(AC_sample)
TP_sample = np.array(TP_sample)

project_sample = np.array(project_sample)
year_sample = np.array(year_sample)
name_sample = np.array(name_sample)

muid_sample = np.array(muid_sample)
guid_sample = np.array(guid_sample)
auid_sample = np.array(auid_sample)
proj_sample = np.array(proj_sample)

In [8]:
len(objID_sample)

383

In [9]:
len(np.unique(objID_sample))

340

In [10]:
selection_mask = []

for i in range(len(objID_sample)):
    
    mask1 = year_sample[i]>2013
    
    mask2 = np.min(res_sample[objID_sample==objID_sample[i]]) < 3
    
    mask3 = np.max(mrs_sample[objID_sample==objID_sample[i]]) > (4 * rpetro_sample[i])
    
    #print(res_sample[objID_sample==objID_sample[i]])
    #print(np.min(res_sample[objID_sample==objID_sample[i]]))
    #print(mask1, mask2, mask1&mask2)
    
    selection_mask.append(mask1 & mask2 & mask3)
    
selection_mask = np.array(selection_mask)

objID_sample = objID_sample[selection_mask]
z_sample = z_sample[selection_mask]
mass_sample = mass_sample[selection_mask]
rpetro_sample = rpetro_sample[selection_mask]
                          
ra_sample = ra_sample[selection_mask]
dec_sample = dec_sample[selection_mask]
res_sample = res_sample[selection_mask]
mrs_sample = mrs_sample[selection_mask]
    
AL_sample = AL_sample[selection_mask]
AC_sample = AC_sample[selection_mask]
TP_sample = TP_sample[selection_mask]

project_sample = project_sample[selection_mask]
year_sample = year_sample[selection_mask]
name_sample = name_sample[selection_mask]

muid_sample = muid_sample[selection_mask]
guid_sample = guid_sample[selection_mask]
auid_sample = auid_sample[selection_mask]
proj_sample = proj_sample[selection_mask]

In [11]:
len(objID_sample)

185

In [12]:
len(np.unique(objID_sample))

167

In [13]:
pilot_mask = []

for i in range(len(objID_sample)):
    
    mask1 = year_sample[i]==2018
    
    mask2 = np.min(res_sample[objID_sample==objID_sample[i]]) < 3
    
    mask3 = np.max(mrs_sample[objID_sample==objID_sample[i]]) > (4 * rpetro_sample[i])
    
    #print(res_sample[objID_sample==objID_sample[i]])
    #print(np.min(res_sample[objID_sample==objID_sample[i]]))
    #print(mask1, mask2, mask1&mask2)
    
    pilot_mask.append(mask1 & mask2 & mask3)
    
pilot_mask = np.array(pilot_mask)

objID_sample = objID_sample[pilot_mask]
z_sample = z_sample[pilot_mask]
mass_sample = mass_sample[pilot_mask]
rpetro_sample = rpetro_sample[pilot_mask]
                          
ra_sample = ra_sample[pilot_mask]
dec_sample = dec_sample[pilot_mask]
res_sample = res_sample[pilot_mask]
mrs_sample = mrs_sample[pilot_mask]
    
AL_sample = AL_sample[pilot_mask]
AC_sample = AC_sample[pilot_mask]
TP_sample = TP_sample[pilot_mask]

year_sample = year_sample[pilot_mask]
name_sample = name_sample[pilot_mask]

muid_sample = muid_sample[pilot_mask]
guid_sample = guid_sample[pilot_mask]
auid_sample = auid_sample[pilot_mask]
proj_sample = proj_sample[pilot_mask]

In [14]:
len(objID_sample)

26

In [15]:
len(np.unique(objID_sample))

22

In [24]:
fpath = '/arc/projects/salvage/ALMA_reduction/samples/'
out = open(fpath + 'pilot_sample.txt', 'w')
out.write('#objID z mass rpetro ra dec res mrs AL AC TP year mui guid auid proj\n')

for i in range(len(objID_sample)):
    out.write(f'{objID_sample[i]} {z_sample[i]} {mass_sample[i]} {rpetro_sample[i]} {ra_sample[i]} {dec_sample[i]} {res_sample[i]} {mrs_sample[i]} {AL_sample[i]} {AC_sample[i]} {TP_sample[i]} {year_sample[i]} {name_sample[i]} {muid_sample[i]} {guid_sample[i]} {auid_sample[i]} {proj_sample[i]}\n')

out.close()