In [1]:
# Install a pip package in the current Jupyter kernel
import sys

### General
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from astropy.io import fits
from astropy.table import Table, vstack
from tqdm import tqdm
import pandas as pd

### Query
from astroquery.sdss import SDSS
from astroquery.gaia import Gaia

In [2]:
hdul = fits.open('../external-dat/binaries/all_columns_catalog.fits')

df = pd.DataFrame(hdul[1].data)

In [3]:
from astropy.table import join

catalog = df[ df['binary_type'] == 'WDMS']
catalog.reset_index(inplace=True, drop=True)

In [4]:
ADQL_CODE1 = "SELECT \
    sdss.original_ext_source_id as bestobjid,\
    gaia_source.source_id\
    FROM gaiaedr3.gaia_source \
    JOIN gaiaedr3.sdssdr13_best_neighbour as sdss\
    ON gaia_source.source_id = sdss.source_id      \
    WHERE gaia_source.source_id IN {}\
".format(tuple(catalog['source_id1']))
ADQL_CODE2 = "SELECT \
    sdss.original_ext_source_id as bestobjid,\
    gaia_source.source_id\
    FROM gaiaedr3.gaia_source \
    JOIN gaiaedr3.sdssdr13_best_neighbour as sdss\
    ON gaia_source.source_id = sdss.source_id      \
    WHERE gaia_source.source_id IN {}\
".format(tuple(catalog['source_id2']))

In [5]:
job1 = Gaia.launch_job_async(ADQL_CODE1,dump_to_file=False)
job2 = Gaia.launch_job_async(ADQL_CODE2,dump_to_file=False)

d1 = job1.get_results()
d2 = job2.get_results()

INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]


In [6]:
print(d1[:10])
print(len(d2))

     bestobjid         source_id    
------------------- ----------------
1237678439166378089  974274676969216
1237678619047624731 1243758105176192
1237678619584561212 1350719970564224
1237678621710286999 1727337062828928
1237673328681681074 2469988447985792
1237678622245912818 4719932835786752
1237670014587109532 5474610129189376
1237678623856263295 5815871050683904
1237678623855870056 5862011883935744
1237667227152744751 5870056358125440
6391


In [7]:
drops = []
bestobjid1 = []
bestobjid2 = []

for i in tqdm (range(len(catalog))):
    notfound = False
    a = np.where(d1['source_id'] == catalog['source_id1'][i])
    b = np.where(d2['source_id'] == catalog['source_id2'][i])
    
    try:
        j = a[0][0]
        k = b[0][0]
    except:
        notfound = True
        
    if not notfound: 
        try:
            bestobjid1.append(d1['bestobjid'][j])
            bestobjid2.append(d2['bestobjid'][k])
        except:
            notfound = True
    if notfound:
        drops.append(i)    

100%|██████████████████████████████████████████████████████████████████████████| 22563/22563 [00:15<00:00, 1425.31it/s]


In [8]:
catalog = catalog.drop(drops)
catalog['bestobjid1'] = bestobjid1
catalog['bestobjid2'] = bestobjid2
catalog.reset_index(inplace=True, drop=True)
tcatalog1 = catalog

In [9]:
SDSS_CODE1 = """select sp.bestObjID, sp.specObjID, sp.plate, sp.fiberID, sp.mjd, sp.subclass, ph.u, ph.g, ph.r, ph.i, ph.z
    from dbo.SpecObjAll as sp
    join dbo.PhotoObjAll as ph 
    on sp.bestObjID = ph.objID
    where sp.bestObjID > 1237648702985666868
    and sp.bestObjID < 1237660529738105365
    and sp.class = 'STAR'"""
SDSS_CODE2 = """select sp.bestObjID, sp.specObjID, sp.plate, sp.fiberID, sp.mjd, sp.subclass, ph.u, ph.g, ph.r, ph.i, ph.z
    from dbo.SpecObjAll as sp
    join dbo.PhotoObjAll as ph 
    on ph.objID = sp.bestObjID
    where sp.bestObjID > 1237660529738105365
    and sp.bestObjID < 1237670529738105366
    and sp.class = 'STAR'"""
SDSS_CODE3 = """select sp.bestObjID, sp.specObjID, sp.plate, sp.fiberID, sp.mjd, sp.subclass, ph.u, ph.g, ph.r, ph.i, ph.z
    from dbo.SpecObjAll as sp
    join dbo.PhotoObjAll as ph 
    on ph.objID = sp.bestObjID
    where sp.bestObjID > 1237670529738105366
    and sp.bestObjID < 1237680529738105576
    and sp.class = 'STAR'"""

In [10]:
convert1= SDSS.query_sql(SDSS_CODE1)
convert2= SDSS.query_sql(SDSS_CODE2)
convert3= SDSS.query_sql(SDSS_CODE3)

print(convert1['subclass'][:10])

  arr = np.atleast_1d(np.genfromtxt(io.BytesIO(response.content),


   subclass   
--------------
            B6
 A4p (G_37-26)
            F5
            F9
            A0
            F9
            CV
M5III (221615)
            K1
            F9


In [11]:
twide_convert = vstack([convert1, convert2, convert3])
wide_convert = twide_convert
print(wide_convert['subclass'])

   subclass   
--------------
            B6
 A4p (G_37-26)
            F5
            F9
            A0
            F9
            CV
M5III (221615)
            K1
            F9
           ...
F3/F5V (30743)
   F2V (33256)
F3/F5V (30743)
   K0V (10780)
F3/F5V (30743)
    G0 (63791)
M5III (221615)
 M3III (44478)
 K5Ve (118100)
F3/F5V (30743)
     CalciumWD
Length = 993190 rows


In [12]:
def search(bestObjID):
    sourceobjid = []
    plate = []
    mjd = []
    fiberid = []
    sclass = []
    u = []
    g = []
    r = []
    i = []
    z = []
    
    url = []
    
    drops = []
    
    for i in tqdm(range(len(bestObjID))):
        notfound = False
        j = np.where(( bestObjID[i] == wide_convert['bestObjID'] ))
        
        try:
            k = j[0][0]
        except:
            notfound = True
            
        if not notfound: 
            try:
                sourceobjid.append(wide_convert['specObjID'][k])
                plate.append(wide_convert['plate'][k])
                mjd.append(wide_convert['mjd'][k])
                fiberid.append(wide_convert['fiberID'][k])
                try:
                    sclass.append(wide_convert['subclass'][k])
                except:
                    pass
                #u.append(wide_convert['u'][k])
                #g.append(wide_convert['g'][k])
                #r.append(wide_convert['r'][k])
                #i.append(wide_convert['i'][k])
                #z.append(wide_convert['z'][k])
            except:
                notfound = True
        if notfound:
            drops.append(i)  
            
    for i in tqdm(range(len(sourceobjid))):
        xid = SDSS.get_spectra_async(plate=plate[i], fiberID=fiberid[i], mjd=mjd[i])
        url.append(str(xid[0]).split(' ')[4])
    
    
    return drops, sourceobjid, plate, mjd, fiberid, url, u, g, r,i,z, sclass

drops1, sourceobjid1, plate1, mjd1, fiberID1, url1 ,u1,g1,r1,i1,z1, subclass1= search( catalog['bestobjid1'] )


100%|█████████████████████████████████████████████████████████████████████████████| 6291/6291 [00:14<00:00, 444.62it/s]
  arr = np.atleast_1d(np.genfromtxt(io.BytesIO(response.content),
100%|███████████████████████████████████████████████████████████████████████████████| 175/175 [00:00<00:00, 519.12it/s]


In [13]:
print(u1)
print(z1)

[]
[]


In [14]:
print(len(catalog))
print(len(drops))

catalog = catalog.drop(drops1)

catalog['specobjid1'] = sourceobjid1
catalog['plate1'] = plate1
catalog['mjd1'] = mjd1
catalog['fiberID1'] = fiberID1
catalog['url1'] = url1
catalog['subclass1'] = subclass1
#catalog['u1'] = u1
#catalog['g1'] = g1
#catalog['r1'] = r1
#catalog['i1'] = i1
#catalog['z1'] = z1

'''

catalog['specbjid2'] = sourceobjid2
catalog['plate2'] = plate2
catalog['mjd2'] = mjd2
catalog['fiberID2'] = fiberID2
catalog['url2'] = url2
'''
catalog.reset_index(inplace=True, drop=True)

6291
16272


In [15]:
print(len(catalog))

drops2, sourceobjid2, plate2, mjd2, fiberID2, url2, u2,g2,r2,i2,z2, subclass2 = search( catalog['bestobjid2'] )

175


100%|███████████████████████████████████████████████████████████████████████████████| 175/175 [00:00<00:00, 448.11it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 804.66it/s]


In [16]:
catalog = catalog.drop(drops2)

catalog['specobjid2'] = sourceobjid2
catalog['plate2'] = plate2
catalog['mjd2'] = mjd2
catalog['fiberID2'] = fiberID2
catalog['url2'] = url2
catalog['subclass2'] = subclass2
#catalog['u2'] = u2
#catalog['g2'] = g2
#catalog['r2'] = r2
#catalog['i2'] = i2
#catalog['z2'] = z2

catalog.reset_index(inplace=True, drop=True)

In [17]:
print(catalog)

           solution_id1         solution_id2           source_id1  \
0   1636042515805110273  1636042515805110273   743097619303531904   
1   1636042515805110273  1636042515805110273   617887567299257600   
2   1636042515805110273  1636042515805110273  3846979117283617152   
3   1636042515805110273  1636042515805110273  2534148150521389440   
4   1636042515805110273  1636042515805110273  2534260777448123520   
5   1636042515805110273  1636042515805110273  1261421999231535616   
6   1636042515805110273  1636042515805110273  2688215052700206976   
7   1636042515805110273  1636042515805110273  4030722598505336192   
8   1636042515805110273  1636042515805110273  3245310308566177536   
9   1636042515805110273  1636042515805110273  3264871552432918528   
10  1636042515805110273  1636042515805110273  3928724924885805568   
11  1636042515805110273  1636042515805110273   676167219784728576   
12  1636042515805110273  1636042515805110273  3671977861604162048   
13  1636042515805110273  163604251

In [18]:
catalog.to_csv('data/catalog_sdss4.csv')