In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io import fits
import glob

___
___
___

In [2]:
params_file = 'test_files/Pantheon/hlsp_ps1cosmo_panstarrs_gpc1_all_model_v1_ancillary-g10.fitres'
pantheon_params = pd.read_csv(params_file, delim_whitespace=True)

snls = pantheon_params.iloc[0:236]
sdss = pantheon_params.iloc[236:571]
lowz = pantheon_params.iloc[571:743]
ps1 = pantheon_params.iloc[743:1022]
hst = pantheon_params.iloc[1022:1048]

pantheon_params.head(3)

Unnamed: 0,VARNAMES:,CID,CIDint,IDSURVEY,TYPE,FIELD,CUTFLAG_SNANA,zCMB,zCMBERR,zHD,...,MURES,MUPULL,ERRCODE,biasCor_mu,biasCorErr_mu,biasCor_mB,biasCor_x1,biasCor_c,biasScale_muCOV,IDSAMPLE
0,SN:,03D1au,1,4,0,D1,1,0.50349,0.0,0.50309,...,-0.013,-0.108,0,0.016,0.006,0.003,0.133,0.003,0.906,3
1,SN:,03D1ax,2,4,0,D1,1,0.4952,0.0,0.4948,...,-0.033,-0.283,0,0.022,0.009,-0.011,-0.052,-0.014,0.935,3
2,SN:,03D1co,3,4,0,D1,1,0.6782,0.0,0.67767,...,0.336,1.705,0,-0.029,0.005,-0.023,-0.011,0.001,0.788,3


## Low-z (172 SNe)

Low-z: 5, 61, 62, 63, 64, 65, 66 (CSP, CFA1, CFA2, CFA3S[4shoote2], CFA3K[keplercam], CFA4p1, CFA4p2)

Several SNe have data from 2 surveys (either CSP and CFA3K or CSP and CFA4p1). I choose the file according to the survey ID in the Pantheon parameters file (IDSURVEY) to avoid any issue with similar bands so close to each other.

These are all in Vega magnitude system (I compared them with the JLA light curves). With CFA4 (p1 and p2) I am not sure as these are not included in JLA, but I gues they should also be in the Vega system.

In [5]:
lowz_files = []
sn_list = []
sample_directory = 'test_files/Pantheon/lightcurves/Pantheon_LOWZ_TEXT/*'

for name, idsurvey in zip(lowz.CID, lowz.IDSURVEY):
    
    files = [file for file in glob.glob(sample_directory) if name in file]
    if len(files)>1:
        # pick only the file with the corresponding survey
        if idsurvey==5:
            file = [file for file in files if 'CSP' in file][0]
        elif idsurvey==64:
            file = [file for file in files if 'CFA3' in file][0]
        elif idsurvey==65:
            file = [file for file in files if 'CFA4' in file][0]
        else:
            print('survey ID', idsurvey, 'not found')
        lowz_files.append(file)
        
    else:
        lowz_files.append(files[0])
    sn_list.append(name)

In [6]:
for file, name in zip(lowz_files, sn_list):
    
    # check where the light curves start
    with open(file) as search:
        for i, line in enumerate(search):
            if 'VARLIST:' in line:
                num_rows = i
            if 'FILTERS:' in line:
                filter_code = line.split()[-1]
                filter_set = line.split()[-3]
                
    # sn info
    sn_info = pantheon_params[pantheon_params.CID==name]
    ra = sn_info.RA.values[0]
    dec = sn_info.DECL.values[0]
    z = sn_info.zCMB.values[0]
    
    lc = pd.read_csv(file, skiprows=num_rows+1, delim_whitespace=True, 
                     names=['mjd', 'band', 'field', 'flux', 'flux_err', 'mag', 'mag_err']).dropna(subset=['mag'])
    lc['zp'] = 27.5
    lc['mag_sys'] = 'BD17'
    
    for band in lc.band.unique():
        band_index = filter_code.index(band)
        new_band = filter_set[band_index]
                    
        if filter_code == 'abcde':
            lc.loc[lc.band==band, 'band'] = f'4Shooter2_{new_band}'
        elif filter_code == 'fghij':
            lc.loc[lc.band==band, 'band'] = f'p1_{new_band}'  # Keplercam
        elif filter_code == 'klmno':
            lc.loc[lc.band==band, 'band'] = f'p1_{new_band}'
        elif filter_code == 'pqrs':
            lc.loc[lc.band==band, 'band'] = f'p2_{new_band}'
        elif filter_code == 'tuvwxyzA':
            lc.loc[lc.band==band, 'band'] = f'csp_{new_band}'
        elif filter_code == 'BCDEF':
            lc.loc[lc.band==band, 'band'] = f'Standard_{new_band}'
        elif filter_code == 'GHIJK':
            lc.loc[lc.band==band, 'band'] = f'Standard_{new_band}'
        else:
            # just in case I am missing any set of filters
            print(name)
            
    lc['flux'] = lc['flux'].round(3)
    lc['flux_err'] = lc['flux_err'].round(3)
     
    sn_file_name = f'data_pantheon/sn{name}.dat'
    with open(sn_file_name, 'w') as file:
        file.write('name z ra dec\n')
        file.write(f'sn{name} {z} {ra} {dec} \n')
    lc[['mjd', 'flux', 'flux_err', 'zp', 'band', 'mag_sys']].to_csv(sn_file_name, index=False, sep=' ', mode='a')

## SDSS (335 SNe)

In [7]:
hdul_head = fits.open('test_files/Pantheon/lightcurves/JLA2014_SDSS_DS17/JLA2014_SDSS_DS17_HEAD.FITS')

index_dict = {}
for i, col_name in enumerate(hdul_head[1].data.dtype.names):
    index_dict[col_name] = i
    
sn_dict = {}
for sn_info in hdul_head[1].data:
    sn_name = sn_info[index_dict['SNID']]
    if sn_name in sdss.CID.values:
        sn_dict[sn_name] = {col_name:sn_info[index_dict[col_name]] for col_name in hdul_head[1].data.dtype.names}

In [8]:
sorter = ['sdss_u', 'sdss_g', 'sdss_r', 'sdss_i', 'sdss_z']

hdul_lc = fits.open('test_files/Pantheon/lightcurves/JLA2014_SDSS_DS17/JLA2014_SDSS_DS17_PHOT.FITS')

index_dict = {}
for i, col_name in enumerate(hdul_lc[1].data.dtype.names):
    index_dict[col_name] = i
    
sn_names = [sn for sn in sn_dict.keys()]
for sn_name in sn_names:
    
    # sn info
    imin = sn_dict[sn_name]['PTROBS_MIN'] - 1
    imax = sn_dict[sn_name]['PTROBS_MAX']
    ra = sn_dict[sn_name]['RA']
    dec = sn_dict[sn_name]['DECL']
    z = sn_dict[sn_name]['REDSHIFT_FINAL']
    
    sn_lc = hdul_lc[1].data[imin:imax]
    sn_lc = np.array(list(set(sn_lc))).T  # turn tuplets into array
    
    lc_indexes = [i for i in index_dict.values()]
    column_names = [name for name in index_dict.keys()]
    lc_df = pd.DataFrame(data=sn_lc[lc_indexes].T, columns=column_names)
    lc_df = lc_df.rename(columns={'MJD':'mjd', 'FLUXCAL':'flux', 'FLUXCALERR':'flux_err', 'FLT':'band'})
    lc_df['band'] = 'sdss_' + lc_df['band'].str.lower()  # some bands are in upper case (don't know why)
    lc_df['zp'] = 27.5
    lc_df['mag_sys'] = 'AB'
    
    # sort bands
    sorterIndex = dict(zip(sorter,range(len(sorter))))
    lc_df['Rank'] = lc_df['band'].map(sorterIndex)
    lc_df.sort_values(['Rank', 'mjd'], ascending = [True, True], inplace = True)
    lc_df.drop('Rank', 1, inplace = True)
    
    sn_file_name = f'data_pantheon/SDSS{sn_name}.dat'
    with open(sn_file_name, 'w') as file:
        file.write('name z ra dec\n')
        file.write(f'SDSS{sn_name} {z} {ra} {dec} \n')
    lc_df[['mjd', 'flux', 'flux_err', 'zp', 'band', 'mag_sys']].to_csv(sn_file_name, index=False, sep=' ', mode='a')

## SNLS (236 SNe)

The zero point in the light curve files is wrong, it is always 27.5

In [9]:
snls_files = []
sn_list = []
sample_directory = 'test_files/Pantheon/lightcurves/JLA2014_SNLS_DS17/*'

for name in snls.CID:    
    file = [file for file in glob.glob(sample_directory) if name in file][0]
    snls_files.append(file)
    sn_list.append(name)

In [10]:
for file, name in zip(snls_files, sn_list):
    
    # check where the light curves start
    with open(file) as search:
        for i, line in enumerate(search):
            if 'VARLIST:' in line:
                num_rows = i
                
    # sn info
    sn_info = pantheon_params[pantheon_params.CID==name]
    ra = sn_info.RA.values[0]
    dec = sn_info.DECL.values[0]
    z = sn_info.zCMB.values[0]
    
    lc = pd.read_csv(file, skiprows=num_rows+1, delim_whitespace=True, 
                     names=['mjd', 'band', 'field', 'flux', 'flux_err', 'snr', 'mag', 'mag_err', 'zpt']).dropna(subset=['mag'])
    lc['zp'] = 27.5
    lc['mag_sys'] = 'AB'
    lc['band'] = 'Megacam_' + lc['band'].astype(str)
    lc[['flux', 'flux_err']] = lc[['flux', 'flux_err']].round(3)
    
    sn_file_name = f'data_pantheon/{name}.dat'
    with open(sn_file_name, 'w') as file:
        file.write('name z ra dec\n')
        file.write(f'{name} {z} {ra} {dec} \n')
    lc[['mjd', 'flux', 'flux_err', 'zp', 'band', 'mag_sys']].to_csv(sn_file_name, index=False, sep=' ', mode='a')

## PAN-STARRS (279 SNe)

In [11]:
ps1_files = []
sn_list = []
sample_directory = 'test_files/Pantheon/lightcurves/Pantheon_PS1MD_TEXT/*.txt'

for name in ps1.CID:    
    file = [file for file in glob.glob(sample_directory) if name in file][0]
    ps1_files.append(file)
    sn_list.append(name)

In [12]:
for file, name in zip(ps1_files, sn_list):
    
    # check where the light curves start
    with open(file) as search:
        for i, line in enumerate(search):
            if 'VARLIST:' in line:
                num_rows = i
                
    # sn info
    sn_info = pantheon_params[pantheon_params.CID==name]
    ra = sn_info.RA.values[0]
    dec = sn_info.DECL.values[0]
    z = sn_info.zCMB.values[0]
    
    lc = pd.read_csv(file, skiprows=num_rows+1, delim_whitespace=True, 
                     names=['mjd', 'band', 'field', 'flux', 'flux_err', 'mag', 'mag_err']).dropna(subset=['mag'])
    lc['zp'] = 27.5
    lc['mag_sys'] = 'AB'
    lc['band'] = 'ps1_' + lc['band'].astype(str)
    lc[['flux', 'flux_err']] = lc[['flux', 'flux_err']].round(3)
    
    sn_file_name = f'data_pantheon/psc{name}.dat'
    with open(sn_file_name, 'w') as file:
        file.write('name z ra dec\n')
        file.write(f'psc{name} {z} {ra} {dec} \n')
    lc[['mjd', 'flux', 'flux_err', 'zp', 'band', 'mag_sys']].to_csv(sn_file_name, index=False, sep=' ', mode='a')

## HST (26 SNe)

HST seems to be in AB magnitude system (I am not sure about SNAP or CANDELS though). Only sn97ff (light curve file) says it is in AB magnitude system.

SN colfax has three files. They are the same, but with slightly different redshift. However, the first file matches with the redshift in the Pantheon parameters file (zCMB), so we use that one.

In [13]:
hst_files = []
sn_list = []
sample_directory = 'test_files/Pantheon/lightcurves/Pantheon_HST_TEXT/*'

for name in hst.CID:   
    file = [file for file in glob.glob(sample_directory) if name.lower() in file.lower()][0]
    hst_files.append(file)
    sn_list.append(name)

In [14]:
hst_filters = {'0':'F110W_NIC2', '1':'F160W_NIC2', '2':'F606W_ACS', '3':'F675W_WFPC2', 
               '4':'F775W_ACS',  '5':'F814W_WFPC2', '6':'F850LP_ACS', '7':'F850lp_WFPC2',
               'B':'ACS_WFC_F435W',   'G':'ACS_WFC_F475W',   'V':'ACS_WFC_F606W',   'R':'ACS_WFC_F625W', 
               'X':'ACS_WFC_F775W',   'I':'ACS_WFC_F814W',   'Z':'ACS_WFC_F850LP',  'S':'WFC3_UVIS_F225W',
               'T':'WFC3_UVIS_F275W', 'U':'WFC3_UVIS_F336W', 'C':'WFC3_UVIS_F390W', 'W':'WFC3_UVIS_F350LP',
               'Y':'WFC3_IR_F105W',   'M':'WFC3_IR_F110W',   'J':'WFC3_IR_F125W',   'N':'WFC3_IR_F140W',
               'H':'WFC3_IR_F160W',   'L':'WFC3_IR_F098M',   'O':'WFC3_IR_F127M',   'P':'WFC3_IR_F139M', 
               'Q':'WFC3_IR_F153M'}

for file, name in zip(hst_files, sn_list):
    
    # check where the light curves start
    with open(file) as search:
        for i, line in enumerate(search):
            if 'VARLIST:' in line:
                num_rows = i
            if 'FILTERS:' in line:
                filter_code = line.split()[-1]
                filter_set = line.split()[-3]
                
    # sn info
    sn_info = pantheon_params[pantheon_params.CID==name]
    ra = sn_info.RA.values[0]
    dec = sn_info.DECL.values[0]
    z = sn_info.zCMB.values[0]
    
    if 'SCP' in name:
        lc = pd.read_csv(file, skiprows=num_rows+1, delim_whitespace=True, skipfooter=1,
                     names=['mjd', 'band', 'field', 'flux', 'flux_err'])
    else:
        lc = pd.read_csv(file, skiprows=num_rows+1, delim_whitespace=True, skipfooter=1,
                         names=['VAR', 'mjd', 'band', 'field', 'flux', 'flux_err', 'mag', 'mag_err', 'NaN'])
        
    lc['zp'] = 27.5
    lc['mag_sys'] = 'AB'
    lc[['flux', 'flux_err']] = lc[['flux', 'flux_err']].round(3)
    
    for band in lc.band.unique():
        band_index = filter_code.index(band)
        new_band = hst_filters[filter_set[band_index]]
        lc.loc[lc.band==band, 'band'] = new_band
    
    sn_file_name = f'data_pantheon/{name}.dat'
    with open(sn_file_name, 'w') as file:
        file.write('name z ra dec\n')
        file.write(f'{name} {z} {ra} {dec} \n')
    lc[['mjd', 'flux', 'flux_err', 'zp', 'band', 'mag_sys']].to_csv(sn_file_name, index=False, sep=' ', mode='a')

  lc = pd.read_csv(file, skiprows=num_rows+1, delim_whitespace=True, skipfooter=1,
  lc = pd.read_csv(file, skiprows=num_rows+1, delim_whitespace=True, skipfooter=1,


___
___
___
## Extra info