In [1]:
import pandas as pd
from astroquery.simbad import Simbad
import os

## Utilities

In [2]:
catalog = "Gaia DR3"

def find_gaia_dr3(value: str):
    if catalog in value:
        index = value.index(catalog)
        return value[index + len(catalog) + 1:index + len(catalog) + 25].strip().split('|')[0].strip()
    else:
        return None

In [3]:
find_gaia_dr3(
    "BD+36  4320|GSC 02700-03701|2MASS J20551542+3644188|TIC 195262851|TYC 2700-3701-1|Gaia DR2 1870094426454523904|Gaia DR3 1870094426454523904|Gaia DR1 1870094422138445824")

'1870094426454523904'

In [7]:
simbad_handler = Simbad()
simbad_handler.ROW_LIMIT = 0
simbad_handler.TIMEOUT = 60 * 60
simbad_handler.reset_votable_fields()

simbad_handler.add_votable_fields(
    "otype",
    "typed_id",
    "id(Gaia)",
    "ids",
    "sptype",
    "otypes",
    "coordinates"
)

In [5]:
simbad_handler.list_votable_fields()

--NOTES--

1. The parameter filtername must correspond to an existing filter. Filters include: B,V,R,I,J,K.  They are checked by SIMBAD but not astroquery.simbad

2. Fields beginning with rvz display the data as it is in the database. Fields beginning with rv force the display as a radial velocity. Fields beginning with z force the display as a redshift

3. For each measurement catalog, the VOTable contains all fields of the first measurement. When applicable, the first measurement is the mean one. 

Available VOTABLE fields:

bibcodelist(y1-y2)
biblio
cel
cl.g
coo(opt)
coo_bibcode
coo_err_angle
coo_err_maja
coo_err_mina
coo_qual
coo_wavelength
coordinates
dec(opt)
dec_prec
diameter
dim
dim_angle
dim_bibcode
dim_incl
dim_majaxis
dim_minaxis
dim_qual
dim_wavelength
dimensions
distance
distance_result
einstein
fe_h
flux(filtername)
flux_bibcode(filtername)
flux_error(filtername)
flux_name(filtername)
flux_qual(filtername)
flux_system(filtername)
flux_unit(filtername)
fluxdata(filtername)

## Download Symbiotic Stars (Symbad)

In [5]:
query = "maintype='Sy*'"
result_table = simbad_handler.query_criteria(query)
df1 = result_table.to_pandas()
df_result = df1[['MAIN_ID', 'OTYPE', 'SP_TYPE', 'ID_Gaia', 'IDS', 'OTYPES']]
df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)
df_result = df_result.dropna(subset=['Gaia DR3'])

out_name = 'SY.csv'
out_dir = './symbad'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

fullname = os.path.join(out_dir, out_name)
df_result.to_csv(fullname, header=True, index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)


## Download Planetary Nebula (Symbad)

In [6]:
out_name = 'NP.csv'
out_dir = './symbad'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
fullname = os.path.join(out_dir, out_name)

queries = [
    "maintype='PN' & rah <= 12",
    "maintype='PN' & rah > 12"
]

result = []
for query in queries:
    result_table = simbad_handler.query_criteria(query)
    df1 = result_table.to_pandas()
    df_result = df1[['MAIN_ID', 'OTYPE', 'SP_TYPE', 'ID_Gaia', 'IDS', 'OTYPES']]
    df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)
    df_result = df_result.dropna(subset=['Gaia DR3'])
    result.append(df_result)

combined_pd = pd.concat(result)
combined_pd.to_csv(fullname, header=True, index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)


## Download Planetary Nebula Candidate (Symbad)

In [10]:
out_name = 'NP_Candidate.csv'
out_dir = './symbad'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
fullname = os.path.join(out_dir, out_name)

queries = [
    "maintype='PN?' & rah <= 12",
    "maintype='PN?' & rah > 12"
]

result = []
for query in queries:
    result_table = simbad_handler.query_criteria(query)
    df1 = result_table.to_pandas()
    df_result = df1[['MAIN_ID', 'OTYPE', 'SP_TYPE', 'ID_Gaia', 'IDS', 'OTYPES', 'RA', 'DEC']]
    df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)
    df_result = df_result.dropna(subset=['Gaia DR3'])
    result.append(df_result)

combined_pd = pd.concat(result)
combined_pd.to_csv(fullname, header=True, index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)


## Download Red Giants (Symbad)

In [13]:
out_name = 'RG.csv'
out_dir = './symbad'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
fullname = os.path.join(out_dir, out_name)

result = []
for i in range(0, 24):
    query = "maintype='RG*' & rah > {0} & rah <= {1}".format(i, i + 1)
    result_table = simbad_handler.query_criteria(query)
    print("finish", i + 1)
    df1 = result_table.to_pandas()
    df_result = df1[['MAIN_ID', 'OTYPE', 'SP_TYPE', 'ID_Gaia', 'IDS', 'OTYPES']]
    df_result['Gaia DR3'] = df_result.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)
    df_result = df_result.dropna(subset=['Gaia DR3'])
    result.append(df_result)

combined_pd = pd.concat(result)
combined_pd.to_csv(fullname, header=True, index=False)

KeyboardInterrupt: 