# Fetch data from MAST/SDSS...

## Imports

In [None]:
import sys
import os
import shutil
import json
from bs4 import BeautifulSoup
import requests
import urllib2
from astropy.io import fits
from astropy import coordinates
from astroquery.mast import Observations
from astroquery.sdss import SDSS
from astroquery.simbad import Simbad
root = "/Users/phdenzel/gleam"
sys.path.append(root)
import gleam

## Functions

In [None]:
def fetch_MAST_productlist(name, N,
                           select_keys={'obs_collection': 'image',
                                        'dataRights': 'PUBLIC'},
                           composite_only=True, science_only=False,
                           verbose=True):
    """
    Args:
        name <str> - object name, e.g. 'PG 1115+080'
        N <int> - number of most recent observations to pull

    Kwargs:
        select_keys <dict> - selection criteria for which to filter
        verbose <bool> - verbose mode; print command line statements
    """
    obs = Observations.query_object(name, radius=".001 deg", )
    select = True
    for k, v in select_keys.items():
        select = select & (obs[k] == v)
    selected = obs[select][:N]
    data_products = Observations.get_product_list(selected)
    if composite_only:
        data_products = data_products[data_products['type'] == 'C']
    if science_only:
        data_products = data_products[data_products['productType'] == 'SCIENCE']
    if verbose:
        print("# of observations:\t {:4d}".format(len(obs)))
        print("# of selected obs:\t {:4d}".format(len(selected)))
        print((len(selected['obs_id'])*"{:>20}\n").format(*list(selected['obs_id'])))
        print('# of products:    \t {:4d}'.format(len(data_products)))
    return data_products


def postfilter_productlist(data_products, extension_keys={}):
    """
    Args:
        data_products <astropy.table.table.Table object> - table of data products

    Kwargs:
        extension_keys <dict> - extension criteria which to filter out
    """
    for k, v in extension_keys.items():
        msk = [False if p[k].endswith(v) else True for p in data_products]
        if sum(msk) < len(data_products):
            data_products = data_products[msk]
    return data_products

## Load from list of lenses

### Paraficz table (https://arxiv.org/abs/1002.2570)

In [None]:
with open('table_1002.2570.json') as json_file:
    delay_qsos = json.load(json_file)

#### From MAST

In [None]:
# select a lens
idx = 1

# some settings
N = 20
select_keys = {
    'obs_collection': 'HST',
    # 'dataproduct_type': 'image',
    # 'wavelength_region': 'Optical',
    'dataRights': 'PUBLIC'
}
extnsn_keys = {'productFilename': 'jpg', }

In [None]:
# fetch data from MAST with name
print(delay_qsos[idx]["System"])
if 1:  # use SIMBAD to resolve object name
    name = delay_qsos[idx]["System"]
    query = Simbad.query_object(name)
    name = query["MAIN_ID"][0] if query else name
else:
    name = delay_qsos[idx]["shortname"]
print(name)
# get products
data_prods = fetch_MAST_productlist(name, N, select_keys=select_keys, composite_only=True)
filt_prods = postfilter_productlist(data_prods, extension_keys=extnsn_keys)
print('# of filtered products:\t {:4d}'.format(len(filt_prods)))
# data_prods.show_in_browser()

In [None]:
# download and save data
manifest = Observations.download_products(data_prods)

In [None]:
savedir = name.replace(" ", "")
gleam.utils.makedir.mkdir_p(savedir)
for root, dirs, filenames in os.walk("mastDownload"):
    if filenames:
        filepaths = [os.path.join(root, f) for f in filenames]
        basedir = [os.path.basename(os.path.dirname(f)) for f in filepaths]
        subsavedir = [os.path.join(savedir, d) for d in basedir]
        for s, t, f in zip(filepaths, subsavedir, filenames):
        gleam.utils.makedir.mkdir_p(t)
            tf = os.path.join(t, f)
            shutil.copy2(s, tf)
# delete mastDownload
shutil.rmtree("mastDownload")

#### From SDSS

In [None]:
# fetch data from SDSS with name/coordinate
name = delay_qsos[idx]["System"]
# name = delay_qsos[idx]["shortname"]
print(name)
pos = coordinates.SkyCoord.from_name(name)
print(pos)
xid = SDSS.query_region(pos, spectro=True)
print(xid)
# xid.show_in_browser()

In [None]:
# download and save data
im = SDSS.get_images(matches=xid)
print(im)

### Lensed quasar database table (https://www.ast.cam.ac.uk/ioa/research/lensedquasars/index.html)

In [None]:
qsos = fits.open("lensedquasars.fits")[1]

#### Webcrawler for automated download

In [None]:
for d in hdu.data:
        name = d[2]
        # use webcrawler to download from https://www.ast.cam.ac.uk/ioa/research/lensedquasars/indiv/[name]
        website = "https://www.ast.cam.ac.uk/ioa/research/lensedquasars/indiv/{}.html".format(name)
        r = requests.get(website)
        text = r.text
        soup = BeautifulSoup(text, features="html5lib")
        fitslink = []
        for search in soup.find_all('a'):
            link = search.get('href')
            if link.endswith('.fits') \
               or link.endswith('.fits.gz') \
               or link.endswith('.tar.gz'):
                fitslink.append(link)
        savedir = "lensedquasars/{}".format(name.replace(" ", ""))
        gleam.utils.makedir.mkdir_p(savedir)
        for fl in fitslink:
            response = urllib2.urlopen(fl)
            filename = os.path.basename(fl)
            print(fl)
            html = response.read()
            with open(os.path.join(savedir, filename), 'wb') as f:
                f.write(html)