# Disclaimer

This does not work because voarchive fall down after some
number of requests

# Simple Spectral Access protocol

SSAP, SSA defines a uniform intreface to remotely discover
and access one dimenisonal spectra. Spectral data access
mmay involve active transformation of data. SSA also
defines complete metadata to describe the available
datasets. It makes use of VOTable for metadata exchange.

## Architecture

A query is used for data discovery and to negotiate the
details of the static or dynamically created dataset
to be retrieved. SSA allows to mediate not only dataset
metadata but the actual dataset itself. Direct access to
data is also provided.

A single service may support multiple operation to perform
various functions. The current interface use an HTTP GET
request to submit parametrized requests with responses
being returned as for example FITS or VOTable. Defined
operations are the following:

1. A queryData operation return a VOTable describing
candidate datasets.
2. A getData operation is used to access an individual
dataset.

## Basic Usage

In [None]:
import urllib.request
import urllib.parse
import io
import os
import glob
from functools import partial
from itertools import count
from astropy.io import fits
import matplotlib.pyplot as plt
%matplotlib inline
import xml.etree.ElementTree as ET
import numpy as np

In [None]:
def make_ssap_url(
    band,
    request='queryData',
    file_format='votable',
    maxrec=10 ** 6,
    url_parameters={},
    url='http://voarchive.asu.cas.cz/ccd700/q/ssa/ssap.xml',
):
    url_parameters['BAND'] = str(band)
    url_parameters['REQUEST'] = request
    url_parameters['FORMAT'] = file_format
    url_parameters['MAXREC'] = maxrec
    return url + '?' + urllib.parse.urlencode(url_parameters)

ssap_url = make_ssap_url(band=6562e-10)
ssap_url

In [None]:
def request_url(url):
    with urllib.request.urlopen(url) as response:
         data = response.read()
    return data

ssap_xml = request_url(ssap_url)
type(ssap_xml)

## XML Parsing

In [None]:
root = ET.fromstring(ssap_xml)
root, root.tag, root.attrib

In [None]:
def get_ids(root):
    # first RESOURCE
    # last TABLE
    # last DATA
    # first TABLEDATA
    # eleventh ID
    return (c[11].text for c in root[0][-1][-1][0])
    
spectra_pub_ids = set(get_ids(root))
len(spectra_pub_ids)

In [None]:
def make_datalink_url(
    pub_id,
    fluxcalib,
    lambda_min,
    lambda_max,
    file_format='application/fits',
    url_parameters={},
    url='http://voarchive.asu.cas.cz/ccd700/q/sdl/dlget'
):
    url_parameters['ID'] = pub_id
    if fluxcalib:
        url_parameters['FLUXCALIB'] = fluxcalib
    url_parameters['LAMBDA_MIN'] = str(lambda_min)
    url_parameters['LAMBDA_MAX'] = str(lambda_max)
    url_parameters['FORMAT'] = file_format
    return url + '?' + urllib.parse.urlencode(url_parameters)

datalink_url = make_datalink_url(
    'ivo://asu.cas.cz/stel/ccd700/sh270028',
    fluxcalib='relative',
    lambda_min=6500e-10,
    lambda_max=6600e-10
)
datalink_url

In [None]:
def ondrejov_fits(fluxcalib=None):
    datalink_url = make_datalink_url(
        'ivo://asu.cas.cz/stel/ccd700/kk150018',
        fluxcalib=fluxcalib,
        lambda_min=6260e-10,
        lambda_max=6735e-10
    )
    fits_data = request_url(datalink_url)
    hdulist = fits.open(io.BytesIO(fits_data))
    plt.plot(hdulist[1].data['spectral'], hdulist[1].data['flux'])

In [None]:
plt.subplot(4, 1, 1)
ondrejov_fits()
plt.subplot(4, 1, 2)
ondrejov_fits('normalized')
plt.subplot(4, 1, 3)
ondrejov_fits('relative')
plt.subplot(4, 1, 4)
ondrejov_fits('UNCALIBRATED')
plt.tight_layout()

# Intersection with Labeled Dataset

In [None]:
names = np.fromfile('../../preprocessing/labeled_data.np', dtype='<U13')
labeled = set(names)

not_labeled = set(map(lambda x: x.split('/')[-1], spectra_pub_ids))

len(labeled), len(not_labeled), len(labeled & not_labeled)

# FITS Download

In [None]:
def download_spectrum(pub_id, n, directory, fluxcalibration, minimum, maximum):
    name = pub_id.split('/')[-1]
    path = os.path.join(directory, name + '.fits')
    url = make_datalink_url(pub_id, fluxcalibration, minimum, maximum)
    print('{}\tdownloading\t{}'.format(n, name))
    try:
        data = request_url(url)
    except Exception as e:
        print(e)
        return name
    with open(path, 'wb') as f:
        f.write(data)

In [None]:
fits_directory = 'ondrejov-ccd700-6260-6735-normalized'
%mkdir $fits_directory 2> /dev/null

ondrejov_downloader = partial(
    download_spectrum,
    directory=fits_directory,
    fluxcalibration='normalized',
    minimum=6260e-10,
    maximum=6735e-10,
)

In [None]:
def get_pub_id(path):
    return 'ivo://asu.cas.cz/stel/ccd700/' + os.path.splitext(os.path.split(path)[-1])[0]

get_pub_id('ondrejov-ccd700-6260-6735-normalized/uh260033.fits')

In [None]:
spectra_pub_ids -= set(map(get_pub_id, glob.glob('ondrejov-ccd700-6260-6735-normalized/*.fits')))
print(len(spectra_pub_ids))
donwload_info = list(map(ondrejov_downloader, spectra_pub_ids, count(start=1)))