In [28]:
import pandas as pd

In [29]:
fgk = pd.read_csv('fgk.txt', sep="\t")
fgk.head()

Unnamed: 0,star,group,ID,ID_alt,star_alt1,star_alt2,origin,snr,R,Rmax,...,e[Ti 1/H],n[Ti 1/H],[Ti 2/H],e[Ti 2/H],n[Ti 2/H],[V 1/H],e[V 1/H],n[V 1/H],n_spectra,flag
0,HIP101345,G Subgiant (IV),HD195564_HAR_1,HIP101345_HARPS_1,HD195564,-,HARPS,902,42000,115000,...,0.023,34,0.062,0.027,7,-0.121,0.024,27,2,-
1,HIP101345,G Subgiant (IV),HD195564_NAR_1,HIP101345_NARVAL_1,HD195564,-,NARVAL,369,42000,68000,...,0.023,34,0.062,0.027,7,-0.121,0.024,27,2,-
2,HIP10234,K Giant (III),HD13468_FER_1,HIP10234_FEROS_1,HD13468,-,FEROS,121,42000,48000,...,0.059,36,-0.221,0.042,8,-0.532,0.061,29,2,-
3,HIP10234,K Giant (III),HD13468_HAR_1,HIP10234_HARPS_1,HD13468,-,HARPS,95,42000,115000,...,0.059,36,-0.221,0.042,8,-0.532,0.061,29,2,-
4,HIP102422,K Subgiant (IV),HD198149_NAR_1,HIP102422_NARVAL_1,HD198149,-,NARVAL,908,42000,68000,...,0.062,35,-0.218,0.031,8,-0.447,0.047,30,2,-


In [30]:
fgk.origin.value_counts()

origin
ELODIE      113
NARVAL      104
HARPS        84
UVES         65
ESPADONS     58
HERMES       39
FEROS        35
FIES         17
CAFE          6
Name: count, dtype: int64

In [31]:
fgk[fgk.origin=='ESPADONS'].head()

Unnamed: 0,star,group,ID,ID_alt,star_alt1,star_alt2,origin,snr,R,Rmax,...,e[Ti 1/H],n[Ti 1/H],[Ti 2/H],e[Ti 2/H],n[Ti 2/H],[V 1/H],e[V 1/H],n[V 1/H],n_spectra,flag
7,HIP104214,K Dwarf (V),HD201091_ESP_1,HIP104214_ESPADONS_1,HD201091,61CygA,ESPADONS,968,42000,68000,...,0.133,37,-0.25,0.106,9,-0.249,0.078,47,3,-
9,HIP104217,K Dwarf (V),HD201092_ESP_1,HIP104217_ESPADONS_1,HD201092,61CygB,ESPADONS,729,42000,68000,...,0.084,28,-0.343,0.174,9,-0.46,0.078,43,3,-
26,HIP112447,F Subgiant (IV),HD215648_ESP_1,HIP112447_ESPADONS_1,HD215648,-,ESPADONS,500,42000,68000,...,0.024,51,-0.208,0.021,12,-0.209,0.046,38,3,-
35,HIP114622,K Dwarf (V),HD219134_ESP_1,HIP114622_ESPADONS_1,HD219134,-,ESPADONS,421,42000,68000,...,0.071,40,0.004,0.047,12,0.158,0.081,46,3,-
46,HIP116771,F Dwarf (V),HD222368_ESP_1,HIP116771_ESPADONS_1,HD222368,-,ESPADONS,500,42000,68000,...,0.018,76,-0.082,0.01,19,-0.165,0.035,61,5,-


In [32]:
from __future__ import annotations

import re
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery.gaia import Gaia
from astroquery.simbad import Simbad


def resolve_object_name(
    name: str,
    epoch: float = 2016.0,
) -> SkyCoord:
    """
    Resolve HIP / HD / Gaia identifiers to a SkyCoord.

    Parameters
    ----------
    name : str
        Object identifier (e.g. HIP12345, HD 224221, Gaia DR3 123...)
    epoch : float
        Reference epoch for Gaia positions (default: 2016.0)

    Returns
    -------
    SkyCoord
        ICRS coordinate with proper motion if available
    """

    name = name.strip()

    # ------------------------
    # Gaia source_id resolver
    # ------------------------
    gaia_match = re.search(r"(gaia\s*(dr2|edr3|dr3)?\s*)?(\d{17,19})", name, re.I)
    if gaia_match:
        source_id = gaia_match.group(3)

        query = f"""
        SELECT ra, dec, pmra, pmdec, parallax
        FROM gaiadr3.gaia_source
        WHERE source_id = {source_id}
        """

        job = Gaia.launch_job(query)
        r = job.get_results()[0]

        return SkyCoord(
            ra=r["ra"] * u.deg,
            dec=r["dec"] * u.deg,
            pm_ra_cosdec=r["pmra"] * u.mas / u.yr,
            pm_dec=r["pmdec"] * u.mas / u.yr,
            distance=None if r["parallax"] <= 0 else (1e3 / r["parallax"]) * u.pc,
            frame="icrs",
            obstime=f"J{epoch}",
        )

    # ------------------------
    # HIP / HD via SIMBAD
    # ------------------------
    custom = Simbad()
    custom.add_votable_fields(
        "pmra", "pmdec", "parallax", "ra(d)", "dec(d)"
    )

    result = custom.query_object(name)
    if result is None:
        raise ValueError(f"Could not resolve object name: {name}")

    return SkyCoord(
        ra=result["RA_d"][0] * u.deg,
        dec=result["DEC_d"][0] * u.deg,
        pm_ra_cosdec=result["PMRA"][0] * u.mas / u.yr if result["PMRA"][0] else None,
        pm_dec=result["PMDEC"][0] * u.mas / u.yr if result["PMDEC"][0] else None,
        distance=None
        if not result["PLX_VALUE"][0]
        else (1e3 / result["PLX_VALUE"][0]) * u.pc,
        frame="icrs",
        obstime="J2000",
    )


In [33]:
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import re
from pathlib import Path
from typing import Optional, Iterable

import astropy.units as u
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy.time import Time
from astroquery.gaia import Gaia
from astroquery.simbad import Simbad
import requests

CADC_TAP_SYNC = "https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/argus/sync"
CADC_RAVEN_FILES = "https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/raven/files/"

# --- ADQL template (from your editor selection) ---
ADQL_TEMPLATE = """
SELECT Observation.observationURI AS "Preview",
 Observation.collection AS "Collection",
 Observation.observationID AS "Obs. ID",
 Plane.productID AS "Product ID",
 COORD1 ( CENTROID ( Plane.position_bounds ) ) AS "RA (J2000.0)",
 COORD2 ( CENTROID ( Plane.position_bounds ) ) AS "Dec. (J2000.0)",
 Plane.time_bounds_lower AS "Start Date",
 Observation.instrument_name AS "Instrument",
 Plane.time_exposure AS "Int. Time",
 Observation.target_name AS "Target Name",
 Plane.energy_bandpassName AS "Filter",
 Plane.calibrationLevel AS "Cal. Lev.",
 Observation.type AS "Obs. Type",
 Plane.energy_bounds_lower AS "Min. Wavelength",
 Plane.energy_bounds_upper AS "Max. Wavelength",
 Observation.proposal_id AS "Proposal ID",
 Observation.proposal_pi AS "P.I. Name",
 Plane.dataRelease AS "Data Release",
 AREA ( Plane.position_bounds ) AS "Field of View",
 Plane.position_bounds AS "Shape",
 Plane.position_sampleSize AS "Pixel Scale",
 Plane.energy_resolvingPower AS "Resolving Power",
 Plane.time_bounds_upper AS "End Date",
 Plane.dataProductType AS "Data Type",
 Observation.target_moving AS "Moving Target",
 Plane.provenance_name AS "Provenance Name",
 Observation.intent AS "Intent",
 Observation.target_type AS "Target Type",
 Observation.target_standard AS "Target Standard",
 Observation.target_keywords AS "Target Keywords",
 Observation.sequenceNumber AS "Sequence Number",
 Observation.algorithm_name AS "Algorithm Name",
 Observation.proposal_title AS "Proposal Title",
 Observation.proposal_keywords AS "Proposal Keywords",
 Plane.position_resolution AS "IQ",
 Observation.instrument_keywords AS "Instrument Keywords",
 Observation.environment_tau AS "Tau",
 Plane.energy_transition_species AS "Molecule",
 Plane.energy_transition_transition AS "Transition",
 Observation.proposal_project AS "Proposal Project",
 Plane.energy_emBand AS "Band",
 Plane.provenance_version AS "Prov. Version",
 Plane.provenance_project AS "Prov. Project",
 Plane.provenance_runID AS "Prov. Run ID",
 Plane.provenance_lastExecuted AS "Prov. Last Executed",
 Plane.energy_restwav AS "Rest-frame Energy",
 Observation.requirements_flag AS "Quality",
 isDownloadable ( Plane.publisherID ) AS "DOWNLOADABLE",
 Plane.publisherID AS "Publisher ID"
FROM caom2.Plane AS Plane
JOIN caom2.Observation AS Observation
ON Plane.obsID = Observation.obsID
WHERE (
  INTERSECTS (
    CIRCLE ( 'ICRS', {ra_deg}, {dec_deg}, {radius_deg} ),
    Plane.position_bounds
  ) = 1
  AND Observation.instrument_name = 'ESPaDOnS'
  AND ( Plane.quality_flag IS NULL OR Plane.quality_flag != 'junk' )
)
"""

# -------------------------
# Name resolver (HIP / HD / Gaia)
# -------------------------
def resolve_name_to_skycoord(name: str) -> SkyCoord:
    s = name.strip()

    # Gaia: accept "Gaia DR3 123..." or raw 17-19 digit source_id
    m = re.search(r"(gaia\s*(dr2|edr3|dr3)?\s*)?(\d{17,19})", s, re.IGNORECASE)
    if m:
        source_id = m.group(3)
        query = f"""
        SELECT ra, dec, pmra, pmdec, parallax
        FROM gaiadr3.gaia_source
        WHERE source_id = {source_id}
        """
        tab = Gaia.launch_job(query).get_results()
        if len(tab) == 0:
            raise ValueError(f"Gaia source_id not found in DR3: {source_id}")
        r = tab[0]
        return SkyCoord(
            ra=r["ra"] * u.deg,
            dec=r["dec"] * u.deg,
            pm_ra_cosdec=(r["pmra"] * u.mas / u.yr) if r["pmra"] is not None else None,
            pm_dec=(r["pmdec"] * u.mas / u.yr) if r["pmdec"] is not None else None,
            distance=None if (r["parallax"] is None or r["parallax"] <= 0) else (1e3 / r["parallax"]) * u.pc,
            frame="icrs",
            obstime=Time("J2016.0"),
        )

    sim = Simbad()
    sim.add_votable_fields("ra(d)", "dec(d)", "pmra", "pmdec", "plx_value")
    res = sim.query_object(s)
    if res is None or len(res) == 0:
        raise ValueError(f"Could not resolve object name via SIMBAD: {name}")

    ra = res["ra"][0] * u.deg
    dec = res["dec"][0] * u.deg
    pmra = res["pmra"][0]
    pmdec = res["pmdec"][0]
    plx = res["plx_value"][0]

    return SkyCoord(
        ra=ra,
        dec=dec,
        pm_ra_cosdec=None if pmra is None else pmra * u.mas / u.yr,
        pm_dec=None if pmdec is None else pmdec * u.mas / u.yr,
        distance=None if (plx is None or plx <= 0) else (1e3 / plx) * u.pc,
        frame="icrs",
        obstime=Time("J2000.0"),
    )


# -------------------------
# CADC TAP query
# -------------------------
def build_adql(coord: SkyCoord, radius_arcsec: float) -> str:
    # Use J2000 for consistency with the UI output columns
    try:
        coord = coord.apply_space_motion(new_obstime=Time("J2000.0"))
    except Exception:
        pass
    radius_deg = (radius_arcsec * u.arcsec).to_value(u.deg)
    return ADQL_TEMPLATE.format(
        ra_deg=float(coord.ra.deg),
        dec_deg=float(coord.dec.deg),
        radius_deg=float(radius_deg),
    )


def tap_sync(adql: str, timeout: int = 120) -> Table:
    params = {
        "LANG": "ADQL",
        "REQUEST": "doQuery",
        "QUERY": adql,
        "FORMAT": "votable",
    }
    r = requests.get(CADC_TAP_SYNC, params=params, timeout=timeout)
    r.raise_for_status()
    from io import BytesIO
    return Table.read(BytesIO(r.content), format="votable")


# -------------------------
# FITS-only downloader
# -------------------------
def fits_only(pid: str) -> bool:
    """
    True only for plain .fits (NOT .fits.fz).
    """
    pid = pid.strip()
    return pid.lower().endswith(".fits") and not pid.lower().endswith(".fits.fz")


def iter_downloadable_publisher_ids(results: Table):
    """
    Yield publisherIDs for downloadable, plain .fits products.
    Robust against CADC VOTable column renaming.
    """

    # ---- locate publisherID column ----
    pid_col = None
    for c in results.colnames:
        if c.lower() in ("caompublisherid", "publisherid"):
            pid_col = c
            break

    if pid_col is None:
        raise KeyError(f"Could not find publisherID column. Got: {results.colnames}")

    # ---- locate DOWNLOADABLE column (optional) ----
    dl_col = None
    for c in results.colnames:
        if "downloadable" in c.lower():
            dl_col = c
            break

    for row in results:
        pid = str(row[pid_col]).strip()
        if not pid:
            continue

        # Respect isDownloadable if present
        if dl_col is not None:
            try:
                if int(row[dl_col]) != 1:
                    continue
            except Exception:
                pass

        # FITS only (exclude .fits.fz)
        pid_l = pid.lower()
        if pid_l.endswith(".fits") and not pid_l.endswith(".fits.fz"):
            yield pid



def download_pids(pids: Iterable[str], outdir: Path, overwrite: bool = False, timeout: int = 120) -> list[Path]:
    outdir.mkdir(parents=True, exist_ok=True)
    saved: list[Path] = []

    for pid in pids:
        url = CADC_RAVEN_FILES + pid  # e.g. .../raven/files/cadc:CFHT/922506i.fits
        fname = pid.replace("cadc:", "").replace("/", "_")
        path = outdir / fname

        if path.exists() and not overwrite:
            continue

        with requests.get(url, stream=True, timeout=timeout) as r:
            r.raise_for_status()
            with open(path, "wb") as f:
                for chunk in r.iter_content(chunk_size=1024 * 1024):
                    if chunk:
                        f.write(chunk)

        saved.append(path)

    return saved

In [36]:
from pathlib import Path
from astropy.table import Table
import requests

RAVEN_BASE = "https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/raven/files/"

def ivo_to_raven_fits(ivo_uri: str) -> str:
    """
    Convert:
      ivo://cadc.nrc.ca/CFHT?860907/860907i
    to:
      https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/raven/files/CFHT/860907/860907i.fits
    """
    if not ivo_uri.startswith("ivo://cadc.nrc.ca/"):
        raise ValueError(f"Unexpected IVO URI: {ivo_uri}")

    path = ivo_uri.replace("ivo://cadc.nrc.ca/", "")
    path = path.replace("?", "/")   # <-- THIS WAS THE MISSING STEP

    return f"{RAVEN_BASE}{path}.fits"


def download_from_downloadable_column(
    results: Table,
    outdir: Path,
    overwrite: bool = False,
    timeout: int = 120,
):
    outdir.mkdir(parents=True, exist_ok=True)

    # Locate the DOWNLOADABLE column (CADC mangles names)
    dl_col = None
    for c in results.colnames:
        if "downloadable" in c.lower():
            dl_col = c
            break

    if dl_col is None:
        raise KeyError(f"No DOWNLOADABLE column found. Got: {results.colnames}")

    downloaded = []

    for row in results:
        ivo = row[dl_col]
        if ivo is None:
            continue

        ivo = str(ivo).strip()
        if not ivo:
            continue

        url = ivo_to_raven_fits(ivo)
        fname = url.split("/")[-1]
        path = outdir / fname

        if path.exists() and not overwrite:
            continue

        with requests.get(url, stream=True, timeout=timeout) as r:
            r.raise_for_status()
            with open(path, "wb") as f:
                for chunk in r.iter_content(chunk_size=1024 * 1024):
                    if chunk:
                        f.write(chunk)

        downloaded.append(path)

    return downloaded


In [38]:
coord = resolve_name_to_skycoord("HIP104214")
adql = build_adql(coord, 5.0)

results = tap_sync(adql, timeout=10.0)
print(f"Query returned {len(results)} rows.")

files = download_from_downloadable_column(
    results,
    outdir=Path("espadons_fits"),
)

  sim.add_votable_fields("ra(d)", "dec(d)", "pmra", "pmdec", "plx_value")
  sim.add_votable_fields("ra(d)", "dec(d)", "pmra", "pmdec", "plx_value")


Query returned 27 rows.


HTTPError: 400 Client Error:  for url: https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/raven/files/CFHT/860907/860907i.fits