In [1]:
import pandas as pd

In [2]:
fgk = pd.read_csv('fgk.txt', sep="\t")
fgk.head()

Unnamed: 0,star,group,ID,ID_alt,star_alt1,star_alt2,origin,snr,R,Rmax,...,e[Ti 1/H],n[Ti 1/H],[Ti 2/H],e[Ti 2/H],n[Ti 2/H],[V 1/H],e[V 1/H],n[V 1/H],n_spectra,flag
0,HIP101345,G Subgiant (IV),HD195564_HAR_1,HIP101345_HARPS_1,HD195564,-,HARPS,902,42000,115000,...,0.023,34,0.062,0.027,7,-0.121,0.024,27,2,-
1,HIP101345,G Subgiant (IV),HD195564_NAR_1,HIP101345_NARVAL_1,HD195564,-,NARVAL,369,42000,68000,...,0.023,34,0.062,0.027,7,-0.121,0.024,27,2,-
2,HIP10234,K Giant (III),HD13468_FER_1,HIP10234_FEROS_1,HD13468,-,FEROS,121,42000,48000,...,0.059,36,-0.221,0.042,8,-0.532,0.061,29,2,-
3,HIP10234,K Giant (III),HD13468_HAR_1,HIP10234_HARPS_1,HD13468,-,HARPS,95,42000,115000,...,0.059,36,-0.221,0.042,8,-0.532,0.061,29,2,-
4,HIP102422,K Subgiant (IV),HD198149_NAR_1,HIP102422_NARVAL_1,HD198149,-,NARVAL,908,42000,68000,...,0.062,35,-0.218,0.031,8,-0.447,0.047,30,2,-


In [3]:
fgk[fgk.origin=='HERMES'].head()

Unnamed: 0,star,group,ID,ID_alt,star_alt1,star_alt2,origin,snr,R,Rmax,...,e[Ti 1/H],n[Ti 1/H],[Ti 2/H],e[Ti 2/H],n[Ti 2/H],[V 1/H],e[V 1/H],n[V 1/H],n_spectra,flag
6,HIP103598,K Bright Giant (II),HD200205_HER_1,HIP103598_HERMES_1,HD200205,-,HERMES,389,42000,80000,...,0.081,16,-0.468,0.044,8,-0.486,0.136,22,2,-
20,HIP109937,K Bright Giant (II),HD211388_HER_1,HIP109937_HERMES_1,HD211388,-,HERMES,214,42000,75000,...,0.05,9,-0.028,0.061,6,0.041,0.176,19,2,-
23,HIP111944,K Giant (III),HD214868_HER_1,HIP111944_HERMES_1,HD214868,-,HERMES,348,42000,75000,...,0.083,22,-0.31,0.027,8,-0.474,0.124,23,2,-
29,HIP112731,K Giant (III),HD216174_HER_1,HIP112731_HERMES_1,HD216174,-,HERMES,218,42000,75000,...,0.108,28,-0.297,0.037,8,-0.601,0.099,27,2,-
56,HIP12530,F Dwarf (V),HD16765_HER_1,HIP12530_HERMES_1,HD16765,-,HERMES,94,42000,75000,...,0.096,11,-0.141,0.005,4,0.113,0.247,4,2,-


In [4]:
from __future__ import annotations

from pathlib import Path
import re
import requests

import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery.gaia import Gaia
import pyvo


DATACENTRAL_SSA_URL = "https://datacentral.org.au/vo/ssa/query"


def download_hermes_spectra(
    identifier: str,
    dr: str = "galah_dr3",
    outdir: str | Path = "hermes_data",
    radius_arcsec: float = 3.0,
    overwrite: bool = False,
    timeout: int = 60,
    maxrec: int = 1000,
    prefer_full_data: bool = True,
) -> list[Path]:
    """
    Download AAT/HERMES spectra served by Data Central (GALAH DR3/DR4) for a given identifier.

    Parameters
    ----------
    identifier : str
        Examples:
          - "HD 195564" or "HD195564"
          - "Gaia DR3 1234567890123456789"
          - "1234567890123456789"  (assumed Gaia DR3 source_id)
          - "Betelgeuse"
    dr : str
        Data Central SSA COLLECTION, e.g. "galah_dr3" or "galah_dr4".
    outdir : str | Path
        Output directory for FITS files.
    radius_arcsec : float
        Cone-search radius around resolved position.
    overwrite : bool
        Overwrite existing files.
    timeout : int
        HTTP timeout for downloads.
    maxrec : int
        Max number of SSA rows to return (Data Central defaults to 1000).
    prefer_full_data : bool
        Prefer downloading 'full_data_url' (original multi-extension file) when available.

    Returns
    -------
    list[Path]
        Paths to downloaded FITS files.
    """

    outdir = Path(outdir)
    outdir.mkdir(parents=True, exist_ok=True)

    coord = _resolve_to_coord(identifier)
    size_deg = (radius_arcsec * u.arcsec).to(u.deg).value

    svc = pyvo.dal.SSAService(DATACENTRAL_SSA_URL)

    # SSA params supported by Data Central include COLLECTION and INSTRUMENT. :contentReference[oaicite:2]{index=2}
    results = svc.search(
        pos=(coord.ra.deg, coord.dec.deg),
        size=size_deg,
        COLLECTION=dr,
        INSTRUMENT="HERMES",
        MAXREC=maxrec,
    )

    if len(results) == 0:
        raise RuntimeError(
            f"No HERMES spectra found in {dr} within {radius_arcsec}\" of {identifier} "
            f"({coord.ra.deg:.6f}, {coord.dec.deg:.6f})."
        )

    tab = results.to_table()

    # Filter again defensively in case other instruments leak in
    tab = _filter_rows_containing(tab, needle="hermes")

    if len(tab) == 0:
        raise RuntimeError(
            f"SSA returned results near {identifier}, but none appear to be HERMES after filtering."
        )

    downloaded: list[Path] = []
    session = requests.Session()

    for row in tab:
        url = None
        if prefer_full_data:
            url = _first_present(row, ["full_data_url", "FULL_DATA_URL", "fullDataUrl"])
        if not url:
            url = _first_present(row, ["access_url", "ACCESS_URL", "acref", "ACREF"])

        if not url:
            continue

        fname = str(url).split("/")[-1]
        if not fname:
            continue

        outpath = outdir / fname
        if outpath.exists() and not overwrite:
            downloaded.append(outpath)
            continue

        r = session.get(str(url), timeout=timeout)
        r.raise_for_status()
        outpath.write_bytes(r.content)
        downloaded.append(outpath)

    if not downloaded:
        raise RuntimeError("No downloadable URLs found in SSA response (no access_url/full_data_url).")

    return downloaded


def _resolve_to_coord(identifier: str) -> SkyCoord:
    s = identifier.strip()

    # Long integer => Gaia DR3 source_id
    if re.fullmatch(r"\d{15,20}", s):
        return _gaia_sourceid_to_coord(s)

    m = re.fullmatch(r"Gaia\s*DR3\s*(\d{15,20})", s, flags=re.IGNORECASE)
    if m:
        return _gaia_sourceid_to_coord(m.group(1))

    # Name resolver (HD, Bayer, etc.)
    return SkyCoord.from_name(s, frame="icrs")


def _gaia_sourceid_to_coord(source_id: str) -> SkyCoord:
    job = Gaia.launch_job_async(
        f"SELECT ra, dec FROM gaiadr3.gaia_source WHERE source_id = {source_id}"
    )
    t = job.get_results()
    if len(t) == 0:
        raise ValueError(f"Gaia DR3 source_id {source_id} not found.")
    return SkyCoord(ra=float(t["ra"][0]) * u.deg, dec=float(t["dec"][0]) * u.deg, frame="icrs")


def _first_present(row, keys: list[str]):
    for k in keys:
        if k in row.colnames:
            v = row[k]
            if v is not None and str(v).lower() != "nan" and str(v).strip() != "":
                return v
    return None


def _filter_rows_containing(tab, needle: str):
    needle = needle.lower()
    # Choose some likely text columns; if none exist, return as-is.
    text_cols = [c for c in tab.colnames if tab[c].dtype.kind in ("U", "S", "O")]
    if not text_cols:
        return tab
    mask = []
    for row in tab:
        blob = " ".join(str(row[c]) for c in text_cols).lower()
        mask.append(needle in blob)
    return tab[mask]


Workaround solutions for the Gaia Archive issues following the infrastructure upgrade: https://www.cosmos.esa.int/web/gaia/news#WorkaroundArchive


In [7]:
files = download_hermes_spectra("Gaia DR3 1234567890123456789", outdir="HD200205_hermes", radius_arcsec=5)
print(len(files), files[:3])

INFO: Query finished. [astroquery.utils.tap.core]


ValueError: Gaia DR3 source_id 1234567890123456789 not found.