<a href="https://colab.research.google.com/github/sundarjhu/EscueladeVerano_2021/blob/main/Escuela_de_Verano_2021_Demo_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Accessing data from SIMBAD, demonstrating ADQL basics**
---



---



In [1]:
from astropy.table import Table, join
import numpy as np
!pip install pyvo
import pyvo as vo

Collecting pyvo
  Downloading pyvo-1.1-py3-none-any.whl (802 kB)
[?25l[K     |▍                               | 10 kB 23.6 MB/s eta 0:00:01[K     |▉                               | 20 kB 27.1 MB/s eta 0:00:01[K     |█▎                              | 30 kB 10.1 MB/s eta 0:00:01[K     |█▋                              | 40 kB 8.5 MB/s eta 0:00:01[K     |██                              | 51 kB 5.0 MB/s eta 0:00:01[K     |██▌                             | 61 kB 5.3 MB/s eta 0:00:01[K     |██▉                             | 71 kB 5.7 MB/s eta 0:00:01[K     |███▎                            | 81 kB 6.4 MB/s eta 0:00:01[K     |███▊                            | 92 kB 6.1 MB/s eta 0:00:01[K     |████                            | 102 kB 5.1 MB/s eta 0:00:01[K     |████▌                           | 112 kB 5.1 MB/s eta 0:00:01[K     |█████                           | 122 kB 5.1 MB/s eta 0:00:01[K     |█████▎                          | 133 kB 5.1 MB/s eta 0:00:01[K     |██

## Create a table of source identifiers, save to VOTable. This will serve as input for our queries.

In [2]:
id = ['lam Vel', 'IRAS 16205-2626', 'eta Car', 'alp Cen', 'IRC +10216'] #unique identifiers for three stars
hemisphere = ['Southern', 'Southern', 'Southern', 'Southern', 'Northern'] #which celestial hemisphere?
input_tab = Table([id, hemisphere], names = ('ID', 'hemisphere'))
input_tab.pprint()
input_tab.write('input_tab.vot', format = 'votable', overwrite = True)

       ID       hemisphere
--------------- ----------
        lam Vel   Southern
IRAS 16205-2626   Southern
        eta Car   Southern
        alp Cen   Southern
     IRC +10216   Northern


### Automatically obtain the url for the SIMBAD TAP server by doing a regsearch using PyVO

In [5]:
r = vo.regsearch(servicetype = 'tap', keywords = ['Simbad'])
SIMBAD_service = r[0].service



In [6]:
# This is the url to the TAP server
SIMBAD_service.baseurl

'http://simbad.u-strasbg.fr:80/simbad/sim-tap'

In [7]:
#Select all columns from the first row of basic data
query = """ SELECT top 1 * FROM basic """ # change "1" to any number to view that many rows in the output
results = SIMBAD_service.search(query).to_table() #submit the ADQL query to the SIMBAD TAP server, and convert the response into an astropy table
results.colnames
#You can use this to inspect the column names and their content in the table

['coo_bibcode',
 'coo_err_angle',
 'coo_err_maj',
 'coo_err_maj_prec',
 'coo_err_min',
 'coo_err_min_prec',
 'coo_qual',
 'coo_wavelength',
 'dec_prec',
 'galdim_angle',
 'galdim_bibcode',
 'galdim_majaxis',
 'galdim_majaxis_prec',
 'galdim_minaxis',
 'galdim_minaxis_prec',
 'galdim_qual',
 'hpx',
 'morph_bibcode',
 'morph_qual',
 'morph_type',
 'nbref',
 'oid',
 'otype',
 'plx_bibcode',
 'plx_err',
 'plx_err_prec',
 'plx_prec',
 'plx_qual',
 'plx_value',
 'pm_bibcode',
 'pm_err_angle',
 'pm_err_maj',
 'pm_err_maj_prec',
 'pm_err_min',
 'pm_err_min_prec',
 'pm_qual',
 'pmdec',
 'pmdec_prec',
 'pmra',
 'pmra_prec',
 'ra_prec',
 'rvz_bibcode',
 'rvz_err',
 'rvz_err_prec',
 'rvz_nature',
 'rvz_qual',
 'rvz_radvel',
 'rvz_radvel_prec',
 'rvz_redshift',
 'rvz_redshift_prec',
 'rvz_type',
 'sp_bibcode',
 'sp_qual',
 'sp_type',
 'update_date',
 'vlsr',
 'main_id',
 'otype_txt',
 'ra',
 'dec']

In [8]:
#How many rows are there in this table?
query = """ select count(coo_bibcode) as NUMROWS from basic"""
results = SIMBAD_service.search(query).to_table() #submit the ADQL query to the SIMBAD TAP server, and convert the response into an astropy table
results

numrows
int64
11898010


In [9]:
#HEY, waitaminit! I asked for the column to be in all caps, why isn't it?
#Because ADQL is case-insensitive unless you enclose the argument in quotes
query = """ select count(coo_bibcode) as "NUMROWS" from basic"""
results = SIMBAD_service.search(query).to_table() #submit the ADQL query to the SIMBAD TAP server, and convert the response into an astropy table
results

NUMROWS
int64
11898010


In [12]:
#SUBSETS with WHERE: How many rows are there in this table with DEC < 0?
query = """ select count(coo_bibcode) as NUMROWS from basic WHERE dec < 0"""
results = SIMBAD_service.search(query).to_table() #submit the ADQL query to the SIMBAD TAP server, and convert the response into an astropy table
results

query = """ select top 100 coo_bibcode as NUMROWS from basic WHERE dec < 0"""
results = SIMBAD_service.search(query).to_table() #submit the ADQL query to the SIMBAD TAP server, and convert the response into an astropy table
results


numrows
object
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G
2018yCat.1345....0G


# There are three important tables in SIMBAD: `basic`, `ident`, and `ids`. 

# `basic` contains the basic information for each source (positions, velocities, photometry, redshift, etc.). If a source is known by many names, there is an entry for EACH such name recognized by SIMBAD.

# Each source in these tables is identified by a unique identifier, a long integer called *oid* (in `basic`) and *oidref* (in `ident`, `ids`). You can connect a source to its various names using these two columns.

# You can obtain a list of references for the sources in the same way using the tables `ref` and `has_ref`.

In [14]:
# Get some information from the basic table for the object with name 'CW Leo', which is NOT its standard SIMBAD name.
#
query = """
-- Basic data from an object given one of its identifiers.
SELECT basic.OID,
       RA,
       DEC,
       main_id AS "Main identifier",
       coo_bibcode AS "Coord Reference",
       nbref AS "NbReferences",
       plx_value as "Parallax",
       rvz_radvel as "Radial velocity",
       galdim_majaxis,
       galdim_minaxis,
       galdim_angle AS "Galaxy ellipse angle"
FROM basic JOIN ident ON oidref = oid
WHERE id = 'CW Leo'
"""
results = SIMBAD_service.search(query).to_table()
results

oid,ra,dec,Main identifier,Coord Reference,NbReferences,Parallax,Radial velocity,galdim_majaxis,galdim_minaxis,Galaxy ellipse angle
Unnamed: 0_level_1,deg,deg,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mas,km / s,arcmin,arcmin,deg
int64,float64,float64,object,object,int32,float64,float64,float32,float32,int16
1657507,146.989193,13.278768,IRC +10216,2003yCat.2246....0C,2200,10.79,--,--,--,--


### Get all the identifiers for one source

In [22]:
query = """select t0.main_id, t1.id, t2.ids from basic as t0, ident as t1, ids as t2 where t0.oid = t1.oidref and t1.id = 'CW Leo' and t1.oidref = t2.oidref"""
results = SIMBAD_service.search(query).to_table()
results

main_id,id,ids
object,object,object
M 13,M 13,C 1639+365|GCl 45|M 13|NGC 6205|2MASX J16414163+3627407|[KPS2012] MWSC 2445|NAME Hercules Globular Cluster


# How to upload a table!
### Get all the identifiers for a list of sources 

In [None]:
input_tab

In [None]:
query = """select mytable.id, t2.ids from mytable, ident as t1, ids as t2 where t1.id = mytable.ID and t1.oidref = t2.oidref"""
results = SIMBAD_service.search(query, uploads = {'mytable': 'input_tab.vot'}).to_table()
results

In [None]:
#Cone search
query = """
-- Display basic data about objects contained in a given circle and whose mag B < 9.0.
SELECT basic.OID,
       RA,
       DEC,
       main_id AS "Main identifier",
       coo_bibcode AS "BiblioReference",
       nbref AS "NbReferences",
       plx_value as "Parallax",
       rvz_radvel as "Radial velocity",
       galdim_majaxis,
       galdim_minaxis,
       galdim_angle AS "Galaxy ellipse angle"
FROM basic JOIN flux ON oidref = oid
WHERE filter = 'B'
      AND flux < 9.0
      AND CONTAINS(POINT('ICRS', RA, DEC), CIRCLE('ICRS', 10, 5, 1)) = 1
ORDER BY "Main identifier"
"""
results = SIMBAD_service.search(query).to_table()
results