In [1]:
import re, os
from urllib.request import urlopen, urlretrieve  # python3 doesn't have urllib2
import bs4  # make sure to do `conda install beautifulsoup4` in starkit environment
import pandas as pd  # make sure to `pip install lxml` - needed for pd.read_html()
from tqdm import tqdm_notebook
import numpy as np

# Ingesting the Index #

In [2]:
SVO_BASE_URL = 'http://svo2.cab.inta-csic.es/svo/theory/fps'

In [3]:
def generate_facilities_list():
    svo_html = urlopen(SVO_BASE_URL).read()
    svo_soup = bs4.BeautifulSoup(svo_html)
    observatory_links = svo_soup.findAll('a', attrs={'href':re.compile('gname')})
    observatory_list = pd.DataFrame(index=range(len(observatory_links)), columns=['facility', 'url'])
    for i, obs_link in enumerate(observatory_links):
        observatory_list.loc[i] = obs_link.text, obs_link.attrs['href']
    return observatory_list

def generate_instruments_list(observatory_url):
    observatory_html = urlopen(observatory_url).read()
    observatory_soup = bs4.BeautifulSoup(observatory_html)
    return observatory_soup.findAll('a', attrs={'href':re.compile('index\.php\?mode=browse&gname=.+?&gname2=.+?')})

def generate_filter_list(observatory_url):
    observatory_html = urlopen(observatory_url).read()
    observatory_soup = bs4.BeautifulSoup(observatory_html)
    filter_soup = observatory_soup.findAll('table')[8]
    filter_table = pd.read_html(str(filter_soup), header=0)[0]
    return filter_table

In [4]:
facilities_list = generate_facilities_list()
facilities_list

Unnamed: 0,facility,url
0,2MASS,index.php?mode=browse&gname=2MASS
1,AAO,index.php?mode=browse&gname=AAO
2,AKARI,index.php?mode=browse&gname=AKARI
3,Astrosat,index.php?mode=browse&gname=Astrosat
4,BOK,index.php?mode=browse&gname=BOK
5,CAHA,index.php?mode=browse&gname=CAHA
6,CFHT,index.php?mode=browse&gname=CFHT
7,COBE,index.php?mode=browse&gname=COBE
8,CTIO,index.php?mode=browse&gname=CTIO
9,DENIS,index.php?mode=browse&gname=DENIS


In [5]:
all_filters = None
for i, facility in tqdm_notebook(facilities_list.iterrows(), desc='Facilities', total=len(facilities_list)):
    print("Working on {0}".format(facility.facility))
    instruments = generate_instruments_list(os.path.join(SVO_BASE_URL, facility.url))
    if u'\xc2' in facility.facility:
        1/0
    if len(instruments) == 0:
        #print("{0} does not have separate instruments".format(facility.facility))
        filter_list = generate_filter_list(os.path.join(SVO_BASE_URL, facility.url))
        filter_list['Obs. Facility'] =  facility.facility
        filter_list['Instrument'] =  'NA'
        #filter_list['']
        if all_filters is None:
            all_filters = filter_list
        else:
            all_filters = all_filters.append(filter_list)
    else:
        for instrument in tqdm_notebook(instruments, desc='Instruments', leave=False):
            #print("Working on {0}".format(instrument.text))
            filter_list = generate_filter_list(os.path.join(SVO_BASE_URL, instrument.attrs['href']))
            filter_list['Obs. Facility'] =  facility.facility
            filter_list['Instrument'] =  instrument.text

            if all_filters is None:
                all_filters = filter_list
            else:
                all_filters = all_filters.append(filter_list)

HBox(children=(IntProgress(value=0, description='Facilities', max=78, style=ProgressStyle(description_width='i…

Working on 2MASS
Working on AAO
Working on AKARI


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on Astrosat
Working on BOK


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on CAHA


HBox(children=(IntProgress(value=0, description='Instruments', max=6, style=ProgressStyle(description_width='i…

Working on CFHT


HBox(children=(IntProgress(value=0, description='Instruments', max=7, style=ProgressStyle(description_width='i…

Working on COBE
Working on CTIO


HBox(children=(IntProgress(value=0, description='Instruments', max=9, style=ProgressStyle(description_width='i…

Working on DENIS
Working on Euclid
Working on GAIA


HBox(children=(IntProgress(value=0, description='Instruments', max=3, style=ProgressStyle(description_width='i…

Working on GALEX
Working on GCPD


HBox(children=(IntProgress(value=0, description='Instruments', max=12, style=ProgressStyle(description_width='…

Working on Gemini


HBox(children=(IntProgress(value=0, description='Instruments', max=13, style=ProgressStyle(description_width='…

Working on Generic


HBox(children=(IntProgress(value=0, description='Instruments', max=6, style=ProgressStyle(description_width='i…

Working on Geneva
Working on GTC


HBox(children=(IntProgress(value=0, description='Instruments', max=3, style=ProgressStyle(description_width='i…

Working on Herschel


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on Hipparcos
Working on HST


HBox(children=(IntProgress(value=0, description='Instruments', max=10, style=ProgressStyle(description_width='…

Working on IAC80


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on ING
Working on INT


HBox(children=(IntProgress(value=0, description='Instruments', max=3, style=ProgressStyle(description_width='i…

Working on IRAS
Working on ISO


HBox(children=(IntProgress(value=0, description='Instruments', max=4, style=ProgressStyle(description_width='i…

Working on IUE
Working on JWST


HBox(children=(IntProgress(value=0, description='Instruments', max=3, style=ProgressStyle(description_width='i…

Working on Keck


HBox(children=(IntProgress(value=0, description='Instruments', max=3, style=ProgressStyle(description_width='i…

Working on Kepler
Working on KPNO


HBox(children=(IntProgress(value=0, description='Instruments', max=4, style=ProgressStyle(description_width='i…

Working on LasCumbres
Working on LaSilla


HBox(children=(IntProgress(value=0, description='Instruments', max=5, style=ProgressStyle(description_width='i…

Working on LBT


HBox(children=(IntProgress(value=0, description='Instruments', max=4, style=ProgressStyle(description_width='i…

Working on LCO


HBox(children=(IntProgress(value=0, description='Instruments', max=9, style=ProgressStyle(description_width='i…

Working on LICK
Working on Liverpool


HBox(children=(IntProgress(value=0, description='Instruments', max=4, style=ProgressStyle(description_width='i…

Working on LSST
Working on McD
Working on Misc


HBox(children=(IntProgress(value=0, description='Instruments', max=6, style=ProgressStyle(description_width='i…

Working on MKO


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on MMT


HBox(children=(IntProgress(value=0, description='Instruments', max=5, style=ProgressStyle(description_width='i…

Working on MSX
Working on NAOC


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on NIRT
Working on NOAO
Working on NOT


HBox(children=(IntProgress(value=0, description='Instruments', max=4, style=ProgressStyle(description_width='i…

Working on OAF
Working on OAJ
Working on OSN


HBox(children=(IntProgress(value=0, description='Instruments', max=6, style=ProgressStyle(description_width='i…

Working on P200


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on Palomar
Working on PAN-STARRS
Working on Paranal


HBox(children=(IntProgress(value=0, description='Instruments', max=10, style=ProgressStyle(description_width='…

Working on SAO
Working on Scorpio


HBox(children=(IntProgress(value=0, description='Instruments', max=4, style=ProgressStyle(description_width='i…

Working on SkyMapper
Working on SLOAN
Working on SOFIA


HBox(children=(IntProgress(value=0, description='Instruments', max=5, style=ProgressStyle(description_width='i…

Working on Special
Working on Spitzer


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on STELLA
Working on Subaru


HBox(children=(IntProgress(value=0, description='Instruments', max=7, style=ProgressStyle(description_width='i…

Working on Swift
Working on TCS
Working on TD1


HBox(children=(IntProgress(value=0, description='Instruments', max=5, style=ProgressStyle(description_width='i…

Working on TESS
Working on TJO
Working on TNG


HBox(children=(IntProgress(value=0, description='Instruments', max=4, style=ProgressStyle(description_width='i…

Working on TNO
Working on TYCHO
Working on UKIRT


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on VATT
Working on WFIRST
Working on WHT


HBox(children=(IntProgress(value=0, description='Instruments', max=3, style=ProgressStyle(description_width='i…

Working on WISE
Working on WIYN


HBox(children=(IntProgress(value=0, description='Instruments', max=2, style=ProgressStyle(description_width='i…

Working on XMM



In [6]:
all_filters.reset_index(inplace=True, drop=True)

In [7]:
columns = all_filters.columns.copy().tolist()
columns[1:5] = ['lambda_' + item[1:] for item in all_filters.columns[1:5]] 
all_filters.columns = columns

In [8]:
all_filters.to_hdf('svo_filter_index.h5', 'all_filters')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->['Filter ID', 'Obs. Facility', 'Instrument', 'Description']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)


In [9]:
all_filters[all_filters['Obs. Facility'] == 'Geneva']

Unnamed: 0,Filter ID,lambda_mean,lambda_eff,lambda_min,lambda_max,Weff,ZP (Jy),Obs. Facility,Instrument,Description
1589,Geneva/Geneva.U,3459.1,3460.5,3006,3892,484.9,1307.8,Geneva,,Geneva U
1590,Geneva/Geneva.B1,4022.8,4036.4,3604,4669,422.1,3699.4,Geneva,,Geneva B1
1591,Geneva/Geneva.B,4248.1,4235.3,3609,5102,787.9,3953.8,Geneva,,Geneva B
1592,Geneva/Geneva.B2,4480.7,4467.5,4023,5012,437.8,4177.5,Geneva,,Geneva B2
1593,Geneva/Geneva.V2,5409.5,5387.3,5003,6105,498.6,3654.4,Geneva,,Geneva V2
1594,Geneva/Geneva.V,5508.5,5462.1,4910,6502,749.7,3591.8,Geneva,,Geneva V
1595,Geneva/Geneva.G,5814.6,5792.8,5404,6547,476.1,3407.6,Geneva,,Geneva G


# Downloading the transmission curves #

In [10]:
tab = pd.read_hdf('svo_filter_index.h5')

In [11]:
ls SVO/

[0m[01;34m2MASS[0m/     [01;34mGALEX[0m/      [01;34mIRAS[0m/        [01;34mLiverpool[0m/  [01;34mOAJ[0m/         [01;34mSpitzer[0m/  [01;34mVATT[0m/
[01;34mAAO[0m/       [01;34mGCPD[0m/       [01;34mISO[0m/         [01;34mLSST[0m/       [01;34mOSN[0m/         [01;34mSTELLA[0m/   [01;34mWFIRST[0m/
[01;34mAKARI[0m/     [01;34mGemini[0m/     [01;34mIUE[0m/         [01;34mMcD[0m/        [01;34mP200[0m/        [01;34mSubaru[0m/   [01;34mWHT[0m/
[01;34mAstrosat[0m/  [01;34mGeneric[0m/    [01;34mJWST[0m/        [01;34mMisc[0m/       [01;34mPalomar[0m/     [01;34mSwift[0m/    [01;34mWISE[0m/
[01;34mBOK[0m/       [01;34mGeneva[0m/     [01;34mKeck[0m/        [01;34mMKO[0m/        [01;34mPAN-STARRS[0m/  [01;34mTCS[0m/      [01;34mWIYN[0m/
[01;34mCAHA[0m/      [01;34mGTC[0m/        [01;34mKepler[0m/      [01;34mMMT[0m/        [01;34mParanal[0m/     [01;34mTD1[0m/      [01;34mXMM[0m/
[01;34mCFHT[0m/      

In [12]:
for i, filter_data in tqdm_notebook(tab.iterrows(), total=len(tab)):
    filt_id = filter_data['Filter ID']
    facility = filter_data['Obs. Facility']
    if not os.path.exists(os.path.join('SVO', facility)):
        os.makedirs(os.path.join('SVO', facility))
    filename = filt_id.split('/')[-1] + '.dat'  # so that filter_names (J,C,H,G,R,etc.) not treated as extensions
    outfile = os.path.join('SVO', facility, filename) 
    if os.path.exists(outfile):
        continue
    url = 'http://svo2.cab.inta-csic.es/svo/theory/fps/getdata.php?format=ascii&id=' + filt_id
    print(outfile)
    urlretrieve(url, outfile)


HBox(children=(IntProgress(value=0, max=4519), HTML(value='')))




# Ingesting the Transmission curves into wsynphot format#

In [13]:
filter_index = pd.read_hdf('svo_filter_index.h5')
filter_index

Unnamed: 0,Filter ID,lambda_mean,lambda_eff,lambda_min,lambda_max,Weff,ZP (Jy),Obs. Facility,Instrument,Description
0,2MASS/2MASS.J,12350.0,12350.0,10806,14068,1624.1,1594.0,2MASS,,2MASS J
1,2MASS/2MASS.H,16620.0,16620.0,14787,18231,2509.4,1024.0,2MASS,,2MASS H
2,2MASS/2MASS.Ks,21590.0,21590.0,19544,23552,2618.9,666.8,2MASS,,2MASS Ks
3,AAO/AAO.aao21,3580.6,3634.3,3014,4172,642.6,1717.2,AAO,,"AAO #21, aao cus04, U"
4,AAO/AAO.aao48,3638.4,3692.7,3113,4168,546.0,1844.3,AAO,,"AAO #48, u 48, U"
5,AAO/AAO.aao70,3758.9,3770.8,3643,3871,164.1,2007.2,AAO,,"AAO #70, ttf 375.19, B0"
6,AAO/AAO.aao71,3904.0,3905.9,3786,4026,167.9,3381.7,AAO,,"AAO #71, ttf 390.17, B1"
7,AAO/AAO.aao72,4123.8,4125.3,3989,4273,140.6,4085.3,AAO,,"AAO #72, ttf 411.15, B2"
8,AAO/AAO.aao49,4279.5,4267.8,3533,5234,963.0,3869.6,AAO,,"AAO #49, b 49, B"
9,AAO/AAO.aao1,4384.2,4351.7,3539,5764,1035.6,3926.3,AAO,,"AAO #1, aao glass b, B"


In [14]:
# Utility function for cleaning data
def is_float(str1,str2):  # checks whether both passed strings are float or not
    try:
        float(str1)
        isFloat1 = True
    except ValueError:
        isFloat1 = False
    try:
        float(str2)
        isFloat2 = True
    except ValueError:
        isFloat2 = False
    return isFloat1 & isFloat2

In [15]:
with pd.HDFStore('filter_data.h5', 'w') as fh:
    for i, filter_data in tqdm_notebook(filter_index.iterrows(), total=len(filter_index)):
        fname = os.path.join('SVO', filter_data['Filter ID'] + '.dat')
        transmission_data = pd.read_csv(fname, names=['wavelength', 'transmission_lambda'],
                                        delim_whitespace=True, comment='#')
        
        # Cleaning filter_index from filters with empty transmission_data -------
        if transmission_data.empty:
            print(fname + " is empty")
            filter_index.loc[i, 'wsynphot_filter_id'] = np.nan  # leave wsynphot_filter_id column NaN in filter_index 
            continue  # and skip ingesting transmission_data in hdf file
        
        # Cleaning transmission_data from garbage strings (by making sure dtype is float only) --------
        elif (transmission_data.wavelength.dtype == np.dtype(object) or
                transmission_data.transmission_lambda.dtype == np.dtype(object)):
            print(fname + " contains bad strings")
            cleaningIndex = transmission_data.apply(lambda row : is_float(row['wavelength'],row['transmission_lambda']),
                                                  axis = 1)
            transmission_data = transmission_data[cleaningIndex].astype(float)
            transmission_data.reset_index(drop=True,inplace=True)
        
        # Ingesting the transmission_data in wsynphot format -------
        wsynphot_filter_id = filter_data['Filter ID'].replace('.', '/').replace('-', '_')
        filter_index.loc[i, 'wsynphot_filter_id'] = wsynphot_filter_id
        fh[wsynphot_filter_id] = transmission_data

HBox(children=(IntProgress(value=0, max=4519), HTML(value='')))































SVO/CTIO/MosaicII.Halpha+80.dat is empty


















SVO/INT/IPHAS.gR.dat contains bad strings
SVO/INT/IPHAS.Ha.dat contains bad strings
SVO/INT/IPHAS.gI.dat contains bad strings






SVO/KPNO/Mosaic.Halpha+40.dat is empty
SVO/KPNO/Mosaic.Halpha+80.dat is empty
SVO/KPNO/Mosaic.Halpha+120.dat is empty
SVO/KPNO/Mosaic.Halpha+160.dat is empty














SVO/Scorpio/Comet.CO+.dat is empty









In [16]:
fh = pd.HDFStore('filter_data.h5', 'a')
filter_index[filter_index['Filter ID'].str.contains('Scorpio/Comet')]

Unnamed: 0,Filter ID,lambda_mean,lambda_eff,lambda_min,lambda_max,Weff,ZP (Jy),Obs. Facility,Instrument,Description,wsynphot_filter_id
3917,Scorpio/Comet.CN_3980,3983.0,3985.5,3925,4048,38.4,3500.4,Scorpio,Comet,SCORPIO CN filter for Comet molecular bands,Scorpio/Comet/CN_3980
3918,Scorpio/Comet.CO+,4334.2,4332.2,4268,4400,40.3,3118.9,Scorpio,Comet,SCORPIO CO+ filter for Comet molecular bands,
3919,Scorpio/Comet.Cont_4470,4476.4,4476.2,4414,4530,36.5,4332.3,Scorpio,Comet,SCORPIO Cont_4470 filter for Comet molecular b...,Scorpio/Comet/Cont_4470
3920,Scorpio/Comet.C2_5125,5125.3,5124.1,4965,5252,122.4,3862.3,Scorpio,Comet,SCORPIO C2 filter for Comet molecular bands,Scorpio/Comet/C2_5125
3921,Scorpio/Comet.Cont_6840,6842.1,6841.8,6758,6937,82.3,2880.4,Scorpio,Comet,SCORPIO Cont_6840 filter for Comet molecular b...,Scorpio/Comet/Cont_6840


In [17]:
# Ingest filter_index after removing rows containing wsynphot_filter_id = NaN
filter_index = filter_index.dropna().reset_index(drop=True)
fh['index'] = filter_index
filter_index[filter_index['Filter ID'].str.contains('Scorpio/Comet')]

Unnamed: 0,Filter ID,lambda_mean,lambda_eff,lambda_min,lambda_max,Weff,ZP (Jy),Obs. Facility,Instrument,Description,wsynphot_filter_id
3911,Scorpio/Comet.CN_3980,3983.0,3985.5,3925,4048,38.4,3500.4,Scorpio,Comet,SCORPIO CN filter for Comet molecular bands,Scorpio/Comet/CN_3980
3912,Scorpio/Comet.Cont_4470,4476.4,4476.2,4414,4530,36.5,4332.3,Scorpio,Comet,SCORPIO Cont_4470 filter for Comet molecular b...,Scorpio/Comet/Cont_4470
3913,Scorpio/Comet.C2_5125,5125.3,5124.1,4965,5252,122.4,3862.3,Scorpio,Comet,SCORPIO C2 filter for Comet molecular bands,Scorpio/Comet/C2_5125
3914,Scorpio/Comet.Cont_6840,6842.1,6841.8,6758,6937,82.3,2880.4,Scorpio,Comet,SCORPIO Cont_6840 filter for Comet molecular b...,Scorpio/Comet/Cont_6840


In [18]:
fh.close()

In [19]:
import wsynphot
from astropy import units as u



In [20]:
x = wsynphot.FilterCurve.load_filter('Keck/NIRC2/Kp')

In [21]:
x.zp_ab_f_nu



<Quantity 3.631e-20 erg / (cm2 Hz s)>

In [22]:
filter_index[filter_index['Filter ID'].str.startswith('HST/WFC3_UVIS1.FQ619')]

Unnamed: 0,Filter ID,lambda_mean,lambda_eff,lambda_min,lambda_max,Weff,ZP (Jy),Obs. Facility,Instrument,Description,wsynphot_filter_id
1826,HST/WFC3_UVIS1.FQ619N,6198.4,6198.2,6147,6253,60.9,3195.7,HST,WFC3_UVIS1,"HST, WFC3, UVIS1, FQ619N, CH4 6194",HST/WFC3_UVIS1/FQ619N
