In [94]:
import pandas as pd

In [95]:
fgk = pd.read_csv('fgk.txt', sep="\t")
fgk.head()

Unnamed: 0,star,group,ID,ID_alt,star_alt1,star_alt2,origin,snr,R,Rmax,...,e[Ti 1/H],n[Ti 1/H],[Ti 2/H],e[Ti 2/H],n[Ti 2/H],[V 1/H],e[V 1/H],n[V 1/H],n_spectra,flag
0,HIP101345,G Subgiant (IV),HD195564_HAR_1,HIP101345_HARPS_1,HD195564,-,HARPS,902,42000,115000,...,0.023,34,0.062,0.027,7,-0.121,0.024,27,2,-
1,HIP101345,G Subgiant (IV),HD195564_NAR_1,HIP101345_NARVAL_1,HD195564,-,NARVAL,369,42000,68000,...,0.023,34,0.062,0.027,7,-0.121,0.024,27,2,-
2,HIP10234,K Giant (III),HD13468_FER_1,HIP10234_FEROS_1,HD13468,-,FEROS,121,42000,48000,...,0.059,36,-0.221,0.042,8,-0.532,0.061,29,2,-
3,HIP10234,K Giant (III),HD13468_HAR_1,HIP10234_HARPS_1,HD13468,-,HARPS,95,42000,115000,...,0.059,36,-0.221,0.042,8,-0.532,0.061,29,2,-
4,HIP102422,K Subgiant (IV),HD198149_NAR_1,HIP102422_NARVAL_1,HD198149,-,NARVAL,908,42000,68000,...,0.062,35,-0.218,0.031,8,-0.447,0.047,30,2,-


In [96]:
fgk.origin.value_counts()

origin
ELODIE      113
NARVAL      104
HARPS        84
UVES         65
ESPADONS     58
HERMES       39
FEROS        35
FIES         17
CAFE          6
Name: count, dtype: int64

In [3]:
fgk[fgk.origin=='ELODIE'].head()

Unnamed: 0,star,group,ID,ID_alt,star_alt1,star_alt2,origin,snr,R,Rmax,...,e[Ti 1/H],n[Ti 1/H],[Ti 2/H],e[Ti 2/H],n[Ti 2/H],[V 1/H],e[V 1/H],n[V 1/H],n_spectra,flag
28,HIP112731,K Giant (III),HD216174_ELO_1,HIP112731_ELODIE_1,HD216174,-,ELODIE,204,42000,45000,...,0.108,28,-0.297,0.037,8,-0.601,0.099,27,2,-
39,HIP114971,K Giant (III),HD219615_ELO_1,HIP114971_ELODIE_1,HD219615,-,ELODIE,143,42000,45000,...,0.057,38,-0.233,0.034,8,-0.312,0.043,30,2,-
235,HIP57939,G Dwarf (V),HD103095_ELO_1,HIP57939_ELODIE_1,HD103095,Gmb1830,ELODIE,359,42000,45000,...,0.09,31,-1.024,0.078,8,-0.921,0.04,29,2,-
266,HIP69673,K Giant (III),HD124897_ELO_1,HIP69673_ELODIE_1,HD124897,Arcturus,ELODIE,902,42000,45000,...,0.07,53,-0.332,0.013,19,-0.445,0.072,57,5,-
343,HIP88348,G Dwarf (V),HD164922_ELO_1,HIP88348_ELODIE_1,HD164922,-,ELODIE,655,42000,45000,...,0.032,51,0.162,0.039,12,0.276,0.036,44,3,-


In [83]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, quote

BASE_URL = "http://atlas.obs-hp.fr/elodie/"

def fetch_search_page(obj_identifier):
    """
    Fetch the ELODIE search result page for a given object identifier.
    """
    # URL encode the object name
    obj_safe = quote(obj_identifier)
    url = f"{BASE_URL}fE.cgi?ob=objname,dataset,imanum&c=o&o={obj_safe}"
    resp = requests.get(url, timeout=15)
    resp.raise_for_status()
    return resp.text

def parse_links(html, base=BASE_URL):
    """
    Parse "spectra" and "CCF" related links from the returned HTML.
    """
    soup = BeautifulSoup(html, "html.parser")
    links = []
    for a in soup.select("a"):
        href = a.get("href")
        text = a.get_text(strip=True).lower()
        if not href:
            continue
        href_full = urljoin(base, href)
        # filter for useful links (FITS, actual data dumps, CCFs, etc.)
        if "ccf" in text or "spec" in text:
            links.append((text, href_full))
    return links

def get_spectra(obj_identifier):
    """
    Returns a list of (label, URL) for all spectra found for this object.
    """
    html = fetch_search_page(obj_identifier)
    links = parse_links(html)
    spectra = [url for (lbl, url) in links if ("get_spec" in lbl and "fits" in url) and "ccf" not in lbl]
    return spectra

def extract_search_ccf_links(html):
    """
    Extract links that point to 'search_ccf' pages.
    """
    soup = BeautifulSoup(html, "html.parser")
    links = []

    for a in soup.find_all("a", href=True):
        if "search_ccf" in a.get_text(strip=True).lower():
            links.append(urljoin(BASE_URL, a["href"]))

    return links

def extract_get_ccf_links(search_ccf_url):
    """
    Given a search_ccf page, extract 'get_ccf' download links.
    """
    html = fetch_html(search_ccf_url)
    soup = BeautifulSoup(html, "html.parser")

    get_ccf_links = []
    for a in soup.find_all("a", href=True):
        label = a.get_text(strip=True).lower()
        href = a["href"]
        if "get_ccf" in label and "html" not in href:
            get_ccf_links.append(urljoin(BASE_URL, href))
    return get_ccf_links

def fetch_html(url):
    r = requests.get(url, timeout=20)
    r.raise_for_status()
    return r.text

def object_page_url(obj_identifier):
    obj_safe = quote(obj_identifier)
    return f"{BASE_URL}fE.cgi?ob=objname,dataset,imanum&c=o&o={obj_safe}"

def search_ccf(obj_identifier):
    """
    Full pipeline:
    object → search_ccf → get_ccf
    """
    obj_url = object_page_url(obj_identifier)
    html = fetch_html(obj_url)

    search_ccf_pages = extract_search_ccf_links(html)

    results = []
    for page in search_ccf_pages:
        results.extend(extract_get_ccf_links(page))

    return results

test_object = "HD224221"

print(f"Fetching spectra for {test_object} ...")
spectra_list = get_spectra(test_object)
for url in spectra_list:
    print("SPECTRUM:", url)

print(f"\nFetching CCF links for {test_object} ...")
ccf_list = search_ccf(test_object)

Fetching spectra for HD224221 ...
SPECTRUM: http://atlas.obs-hp.fr/elodie/fE.cgi?n=e500&c=i&z=s1d&a=mime:application/fits&o=elodie:19951220/0009

Fetching CCF links for HD224221 ...


In [87]:
for url in spectra_list:
    r = requests.get(url)
    with open(url.split("/")[-1], "wb") as f:
        f.write(r.content)

In [88]:
for url in ccf_list:
    r = requests.get(url)
    with open(url.split("/")[-1], "wb") as f:
        f.write(r.content)

In [90]:
from astropy.io import fits

ccf_filename = "ccf.fits"
hdulist = fits.open(ccf_filename)