# Gen New SIMLIB

Generate a new SIMLIB file for SNANA from an existing one with a chosen cadence.

Given that the data is taken from real surveys, all the information regarding the sky and filters (noise, signal, psf, etc.) are given at some exact epochs, so, in order to generate new observations in "non-existing" epochs, I randomly select the values from existing ones.

In [2]:
import pandas as pd
import numpy as np
import random
import glob
import os

In [3]:
def generate_simlib(simlib_file, output_file, cad=5, nlibs=None,
                        add_uncertainty=False, xunc=None, 
                            add_randomness=False, err=None):
    
    start_indexes = []
    end_indexes = []
    lib_list = []

    # Some SIMLIB files might have encoding errors. 
    # By ignoring them there could be some data loss.
    with open(simlib_file, errors='ignore') as file:
        for i, line in enumerate(file):
            if 'LIBID:' in line.split():
                start_indexes.append(i)
                lib_list.append(line.split()[1])
            if 'END_LIBID:' in line.split():
                end_indexes.append(i)
    
    ##########################################################################
    
    sim_dict = {'libid_'+lib:None for lib in lib_list}
    nobs_dict = {'libid_'+lib:None for lib in lib_list}

    #### LIB dataframe ####
    if nlibs is None:
        nlibs = len(lib_list)  # all libs
    
    for i in range(nlibs):
        # select indexes of SN LIB
        header_lines = 4
        tail_lines = 1
        if 'lowz' in simlib_file.lower():
            tail_lines = 2
        skiprows = start_indexes[i] + header_lines
        nrows = (end_indexes[i] - start_indexes[i]) - (header_lines + tail_lines)
        names = ['S', 'MJD', 'IDUM', 'BAND', 'GAIN', 'RDNOISE', 'SKYSIG', 
                 'PSF1', 'PSF2', 'PSFRAT', 'ZP', 'ZPERR', 'MAG']
        sim_df = pd.read_csv(simlib_file, names=names, skiprows=skiprows, nrows=nrows, comment='#', delim_whitespace=True)
        sim_df = sim_df.dropna(axis='index')  # some simlib files have SNe with data from 2 fields(?)
        
        new_sim_df = pd.DataFrame(columns=names)

        ##### pick cadence ####
        min_mjd = np.trunc(float(sim_df.MJD.min()))
        max_mjd = np.trunc(float(sim_df.MJD.max()))
        mjd_arrange = np.arange(min_mjd, max_mjd + cad, cad)
        nobs = len(mjd_arrange) * len(sim_df.BAND.unique())  # includes all bands
        nobs_dict['libid_'+lib_list[i]] = nobs

        # add randomness to the cadence
        if add_randomness:
            mjd_arrange = np.asarray([mjd + random.randint(-err, err) for mjd in mjd_arrange])

        # increase uncertainty
        if add_uncertainty:
            sim_df.SKYSIG = sim_df.SKYSIG.values*xunc

        ##### create new observations #####
        idum = 1
        for band in sim_df.BAND.unique():
            band_df = sim_df[sim_df.BAND.values==band]

            for mjd in mjd_arrange:
                ind = random.randint(0, len(band_df)-1)  # pick random epoch properties
                new_row = band_df.iloc[ind].copy()
                new_row.MJD = mjd
                new_row.IDUM = idum
                new_sim_df = new_sim_df.append(new_row)
                idum += 1

        # save dataframe
        lib = lib_list[i]
        sim_dict['libid_'+lib] = new_sim_df

    ##########################################################################
    #### write output file ####  
    with open(simlib_file, "rt", errors='ignore') as fin:
        with open(output_file, "wt") as fout:
            for i, line in enumerate(fin):
                
                if i <= end_indexes[nlibs-1] or i > end_indexes[-1]:
                    # modify number of observations
                    if 'LIBID:' in line.split():
                        libid = line.split()[1]
                        if libid in lib_list:
                            nobs = nobs_dict['libid_'+libid]
                    elif 'NOBS:' in line.split():
                        splitted_line = line.split()
                        nobs_index = splitted_line.index('NOBS:')
                        splitted_line[nobs_index+1] = str(nobs)
                        line = '   '.join(splitted_line) + '\n'
                        
                    elif 'NOBS:' in line.split():
                        splitted_line = line.split()
                        nobs_index = splitted_line.index('NOBS:')
                        splitted_line[nobs_index+1] = str(nobs)
                        line = '   '.join(splitted_line) + '\n'

                    # skip lines starting with 'S:', i.e., the ones with the observations info
                    elif 'S:' in line.split():
                        continue  

                    # write dataframe to file
                    elif 'END_LIBID:' in line.split():
                        libid = line.split()[-1]
                        if libid in lib_list:
                            sim_df = sim_dict['libid_'+str(libid)]
                            sim_df[names].to_csv(output_file, index=False, sep=' ', mode='a', header=False)
                            for df_vals in sim_df.values:
                                row = [str(val) for val in df_vals]
                                new_line = ' '.join(row)
                                fout.write(new_line)
                                fout.write('\n')

                    fout.write(line)
        print(f'Ready!')

In [5]:
#lowz  - 430 libs
cadences = [1, 3, 5, 7]

for cad in cadences:
    simlib_file = '/media/data1/muller/SNANA/test/LOWZ/PS1_LOWZ_COMBINED.SIMLIB'
    output_file = f'/media/data1/muller/SNANA/test/LOWZ/LOWZ_{cad}DCAD.SIMLIB'
    generate_simlib(simlib_file, output_file, cad=cad)

Ready!
Ready!
Ready!
Ready!


In [4]:
#sdss  - 2000 libs
cadences = [1, 3, 5, 7]

for cad in cadences:
    simlib_file = '/media/data1/muller/SNANA/test/SDSS/SDSS_3year.SIMLIB'
    output_file = f'/media/data1/muller/SNANA/test/SDSS/SDSS_{cad}DCAD.SIMLIB'
    generate_simlib(simlib_file, output_file, cad=cad, nlibs=500)

Ready!
Ready!
Ready!
Ready!


In [27]:
#snls  - 69 libs
cadences = [1, 3, 5, 7]

for cad in cadences:
    simlib_file = '/media/data1/muller/SNANA/test/SNLS/SNLS_Ast06.SIMLIB.COADD'
    output_file = f'/media/data1/muller/SNANA/test/SNLS/SNLS_{cad}DCAD.SIMLIB'
    generate_simlib(simlib_file, output_file, cad=cad)

Ready!


In [28]:
#ps1MD  - 10 libs
cadences = [1, 3, 5, 7]

for cad in cadences:
    simlib_file = '/media/data1/muller/SNANA/test/PS1/PS1MD.simlib'
    output_file = f'/media/data1/muller/SNANA/test/PS1/PS1_{cad}DCAD.SIMLIB'
    generate_simlib(simlib_file, output_file, cad=cad, add_uncertainty=True, xunc=0.2)

Ready!
