# <font color='orange'>Query the ESO Science Archive for raw data and their associated nightlogs and raw calibrations</font>

## Initialisations

In [None]:
TAP_URL = "http://archive.eso.org/tap_obs"
TAP_URL_SIMBAD = 'https://simbad.unistra.fr/simbad/sim-tap'

# Importing useful packages
import os 
import sys
import requests
import json
import time
import getpass
from pathlib import Path
from astropy.table import Table, vstack, unique
from astropy.io import fits
import numpy as np
import glob

import ipywidgets as widgets
from IPython.display import display, clear_output

import pyvo
from pyvo.dal import tap
from pyvo.auth.authsession import AuthSession
    
# Verify the version of pyvo 
# from pkg_resources import parse_version
from packaging.version import parse as parse_version
pyvo_version = parse_version(pyvo.__version__) 
test_pyvo_version = (pyvo_version == parse_version('1.1') or pyvo_version > parse_version('1.2.1') )
if not test_pyvo_version:
    print('You are using a not supported version of pyvo (version={version}).\nPlease use pyvo v1.1, v1.3, or higher, not v1.2* [ref. pyvo github issue #298]'.format(version=pyvo.__version__))
    raise ImportError('The pyvo version you are using is not supported, use 1.3+ or 1.1.')

import utilities
import eso_programmatic as eso

In [None]:
def get_files_by_category(directory_path, category, file_pattern="*.fits", match_type='equal'):
    """Concise version using list comprehension"""
    files = glob.glob(os.path.join(directory_path, file_pattern))
    
    out_files = []
    for file_path in files:
        try:
            with fits.open(file_path) as hdul:
                header_value = hdul[0].header.get('HIERARCH ESO DPR CATG')
                matches = (header_value == category) if match_type == 'equal' else (header_value != category)                
                
                if matches:
                    out_files.append(file_path.split('/')[-1].replace('.fits', ''))
        except:
            continue
    
    return out_files


def remove_partial_matches(table, column_name, partial_strings):
    """Using numpy.where approach"""
    
    str_column = np.asarray(table[column_name], dtype=str)
    
    # Create a boolean array for all rows
    keep_mask = np.ones(len(str_column), dtype=bool)
    
    for partial in partial_strings:
        # Find indices where partial string is found
        matches = np.char.find(str_column, partial) >= 0
        # Update keep_mask to exclude these matches
        keep_mask = keep_mask & (~matches)

    if isinstance(table, Table):
        return table[keep_mask]
    else:
        return table.to_table()[keep_mask]

## Prompt for user's credentials and get a token for authenticated access

In [None]:
username = input("Type your ESO username (blank for unauthenticated): ")
if username:
    password=getpass.getpass(prompt="%s's password: "%(username), stream=None)

    token = eso.getToken(username, password)
    if token != None:
        print('token: ' + token)
        session = requests.Session()
        session.headers['Authorization'] = "Bearer " + token

        tap = pyvo.dal.TAPService(TAP_URL, session=session)
    else:
        sys.exit(-1)
else:
    print('OK, no proprietary data then ...')
    token = ''
    tap = pyvo.dal.TAPService(TAP_URL)

## Query to the ESO archive ...
####   Define some typical queries

### &nbsp;&nbsp;&nbsp;&nbsp; ... positional query with Simbad name resolution

In [None]:
tap_simbad = pyvo.dal.TAPService(TAP_URL_SIMBAD)

target = 'SN1987A'

query_simbad = """
SELECT ra, dec, main_id
  FROM basic
  WHERE main_id = '""" + target + """'
"""

simbad = utilities.run_tap_query(tap_simbad, query_simbad)
ra = str(simbad['ra'].data[0])
dec = str(simbad['dec'].data[0])

query = """
SELECT TOP 2 *
FROM dbo.raw
WHERE dp_cat = 'SCIENCE'
AND instrument = 'UVES'
AND  INTERSECTS(s_region, CIRCLE('ICRS', """ + ra + """, """ + dec + """, 100./3600.)) = 1
                           -- a circle of radius=100 arcsec,
AND date_obs > '2015'
"""

### &nbsp;&nbsp;&nbsp;&nbsp; ... query by programme ID

In [None]:
prog_id = ''

query = """
SELECT * from dbo.raw
where prog_id LIKE '""" + prog_id + """%'
and dp_cat='SCIENCE'
"""

### &nbsp;&nbsp;&nbsp;&nbsp; ... query by PI name

In [None]:
pi_name = ''

query = """
SELECT * from dbo.raw
where pi_coi LIKE '""" + pi_name + """%'
and dp_cat='SCIENCE'
and instrument='UVES'
"""

## Run the query

In [None]:
results = utilities.run_tap_query(tap, query)

if results:
    print('%i matching files found' % len(results))
    # eso.printTableTransposedByTheRecord(results.to_table()) 
else:
    print("!" * 42)
    print("!                                        !")
    print("!       No results could be found.       !")
    print("!       ? Perhaps no permissions ?       !")
    print("!       Aborting here.                   !")
    print("!                                        !")
    print("!" * 42)
    quit()

## Download the selected raw science files ...

In [None]:
selector = utilities.TextSelector(default_description='Save directory:', default_input='/home/user/EDPS_data/')
selector.display()

In [None]:
dirname = selector.get_input()
Path(dirname).mkdir(parents=True, exist_ok=True)

### &nbsp;&nbsp;&nbsp;&nbsp; ... if desired, first filter the results of the query to only the files that are not already on disk

In [None]:
local_sci_files = get_files_by_category(dirname, 'SCIENCE')
results = remove_partial_matches(results, 'access_url', local_sci_files)

### &nbsp;&nbsp;&nbsp;&nbsp; ... and now the actual download

In [None]:
print("Start downloading...")
for i_raw, raw in enumerate(results):
    access_url = raw['access_url'] # the access_url is the link to the raw file
    if username:
        status, filepath = eso.downloadURL(access_url, session=session, dirname=dirname)
    else:
        status, filepath = eso.downloadURL(access_url, dirname=dirname)
    if status==200:
        print("%4d/%d      RAW science: %s downloaded  "  % (i_raw+1, len(results), filepath))
    else:
        print("\x1b[31m %4d/%d      ERROR RAW: %s NOT DOWNLOADED (http status:%d)\x1b[0m"  % (i_raw+1, len(results), filepath, status))
print('... download complete')

## ... and the corresponding raw calibrations and nightlogs

In [None]:
mode_requested = 'raw2raw'
semantics = 'http://archive.eso.org/rdf/datalink/eso#calSelector_' + mode_requested
semantics_nl = 'http://archive.eso.org/rdf/datalink/eso#night_log'

uniqe_calib_urls = Table(names=('access_url', 'eso_category'), dtype=('object', 'object'))

for ii, result in enumerate(results):
    print('%4d/%d   Processing raw science file %s for calibrations' % (ii+1, len(results), result['dp_id']))
    datalink_url = result['datalink_url']

    if username:
        datalink = pyvo.dal.adhoc.DatalinkResults.from_result_url(datalink_url, session=session)
    else:
        datalink = pyvo.dal.adhoc.DatalinkResults.from_result_url(datalink_url)

    calsel_url = next(datalink.bysemantics(semantics)).access_url
    nl_url = next(datalink.bysemantics(semantics_nl)).access_url

    if username:
        associated_calib_files = pyvo.dal.adhoc.DatalinkResults.from_result_url(calsel_url, session=session)
    else:
        associated_calib_files = pyvo.dal.adhoc.DatalinkResults.from_result_url(calsel_url)
    #
    # create and use a mask to get only the #calibration entries,
    # given that other entries, like #this or ...#sibiling_raw, could be present:
    calibrator_mask = associated_calib_files['semantics'] == '#calibration'
    calib_urls = associated_calib_files.to_table()[calibrator_mask]['access_url','eso_category']
    uniqe_calib_urls = unique(vstack([uniqe_calib_urls, calib_urls]), keys='access_url')

    # Download the nightlog
    if username:
        status, filename = eso.downloadURL(nl_url, dirname=dirname, session=session)
    else:
        status, filename = eso.downloadURL(nl_url, dirname=dirname)
    # 
    if status == 200:
        print("       ... nightlog downloaded")
    else:
        print("\x1b[31m       ... nightlog NOT DOWNLOADED (http status:%d)\x1b[0m"  % (status))

# Do not download the files that are aleady on disk
local_calib_files = get_files_by_category(dirname, 'SCIENCE', match_type='not equal')
uniqe_calib_urls = remove_partial_matches(uniqe_calib_urls, 'access_url', local_calib_files)

for i_calib, (url, category) in enumerate(uniqe_calib_urls):
    if username:
        status, filename = eso.downloadURL(url, dirname=dirname, session=session)
    else:
        status, filename = eso.downloadURL(url, dirname=dirname)
    if status == 200:
        print("       ... %4d/%d %s (%s) downloaded"  % (i_calib+1, len(uniqe_calib_urls), filename, category))
    else:
        print("\x1b[31m       ... %4d/%d %s (%s) NOT DOWNLOADED (http status:%d)\x1b[0m"  % (i_calib+1,len(uniqe_calib_urls), filename, category, status))

print('... download done ...')

utilities.decompress_files(dirname)

print('... and so is decompression!')