# Light curve stats
This notebook computes several useful light curve properties for use in later notebooks.  Qunatities such as number of  points above a certain S/N in each season, variability, identifying the most variable season, peak magnitudes in each filter, number of pre-explosion non-detection points, day of first detection, etc.

In [1]:
import sys
import os.path
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit

In [20]:
BASEDIR = '/Users/masao/dessn/'   # edit this to point to your root directory

# Workflow

* Read data from PSNID output.

  * `BASEDIR + 'DES_ALL/DESALL_forcePhoto_real_snana_fits/FITOPT000.FITRES'`
  * `BASEDIR + 'DES_ALL/DESALL_forcePhoto_real_snana_fits/FITOPT000.LCPLOT'`
  * These files can be retrived from Midway: 
  
  `/scratch/midway2/masao/psnid_desdata/DES_ALL/DESALL_forcePhoto_real_snana_fits/`
  

* and another file with the SN candidate sky coordinates.

  * `BASEDIR + 'CIDcoord.csv'` 
  

* Compute:

  * number of S/N > 3 epochs each year `nSNR3_YX`
  * number of S/N > 5 epochs each year `nSNR5_YX`
  * tagging of year with largest number of epochs `yearSNR3` and `yearSNR5`
  * flagging single and adjacent-year variability `flagSNR3` and `flagSNR5`
  * cumulative SNR in each season `cumSNR_YX` and `cumSNRsgn_YX` (signed; negative flux gets negative SNR)
  * fits each season's light curve with a straight line and compute significance of offset `VAR0_YX` and slope `VAR1_YX`
  * peak magnitudes (all data) `minmag_{griz}`
  * peak magnitudes and MJD in dominant year `peakmag_{griz}` and `peakmjd_{griz}`
  * number of pre-explosion non-detection points `nnnondet_{griz}`
  * day of discovery (S/N>5) in days from August 1 of that season `firstdetday_{griz}`
  * to avoid rerunning, each step runs if the output file `LC_*` doesn't exist; otherwise it reads and uses it
    * `LC_SNRinfo.csv`
    * `LC_VARinfo.csv`
    * `LC_PeakInfo.csv`
    * `LC_NnonDetinfo.csv`
    * `LC_MinMagsinfo.csv`
    * `LC_FirstDetDays.csv`
  * final results are saved in `LC_MergedOutput.csv`

In [2]:
# August 1 - March 1 of years 2012-2018
mjdrangedata = {'year': [0,1,2,3,4,5],
                'lo': [56140.0, 56505.0, 56870.0, 57235.0, 57601.0, 57966.0],
                'hi': [56352.0, 56717.0, 57082.0, 57447.0, 57813.0, 58178.0]}
mjdrange = pd.DataFrame(mjdrangedata, columns = ['year', 'lo', 'hi'])

## SNR info

In [3]:
def getSNR(lcdata, snrmin):
    
    # Determine the number of light curve measurements in each season [0-5] with SNR > snrmin
    # Input:
    #   lcdata = light curve dataframe
    #   snrmin = mininum SNR
    # Output:
    #   nSNR     = number of measurements above snrmin [0-5]
    #   year     = year with largest nSNR
    #   thisflag = nSNR>=5 in single season (1), adjacent seasons (2), otherwise (0)
    #   cumSNR   = cumulative SNR [0-5]
    

    nSNR = []
    cumSNR = []
    cumSNRsgn = []
    year = -1
    yearmax = -1

    for y in range(len(mjdrange)):

        # all positive fluxes in a given year
        lc    = lcdata[(lcdata['MJD'] > mjdrange['lo'][y])
                     & (lcdata['MJD'] < mjdrange['hi'][y])
                     & (lcdata['FLUXERR'] > 0.0)]
        
        # (S/N)^^2
        snr2 = (lc['FLUX']/lc['FLUXERR'])**2
        tot  = snr2.sum()
        cum  = np.sqrt(tot)

        snr2sgn = (np.sign(lc['FLUX']))*(lc['FLUX']/lc['FLUXERR'])**2
        totsgn  = snr2sgn.sum()
        cumsgn  = np.sign(totsgn)*np.sqrt(np.absolute(totsgn))
        
        lcsnr = lc[lc['FLUX']/lc['FLUXERR'] > snrmin]
        nlc = len(lcsnr)

        if (nlc > yearmax) & (nlc > 0):
            year    = mjdrange['year'][y]
            yearmax = nlc
        
        nSNR.append(nlc)
        cumSNR.append(cum)
        cumSNRsgn.append(cumsgn)
     
    # single-season and 2-season transients in adjacent years
    flag = np.zeros(6,dtype=int)

    flag[0] = nSNR[0] >= 5
    flag[1] = nSNR[1] >= 5
    flag[2] = nSNR[2] >= 5
    flag[3] = nSNR[3] >= 5
    flag[4] = nSNR[4] >= 5
    flag[5] = nSNR[5] >= 5
    flag[1] = 2*flag[1]
    flag[2] = 4*flag[2]
    flag[3] = 8*flag[3]
    flag[4] = 16*flag[4]
    flag[5] = 32*flag[5]
    sum_f = np.sum(flag)

    if (sum_f == 1) | (sum_f == 2) | (sum_f == 4) | (sum_f == 8) | (sum_f == 16) | (sum_f == 32):
        # single season transient
        thisflag = 1
    elif (sum_f == 3) | (sum_f == 6) | (sum_f == 12) | (sum_f == 24) | (sum_f == 48):
        # 2-season transient in adjacent years
        thisflag = 2
    else:
        thisflag = 0
    
    return nSNR, year, thisflag, cumSNR, cumSNRsgn

In [4]:
def getSNRinfo(canddf, lcdf):

    dt = np.dtype([
        ('CID', np.int64),
        ('nSNR3_Y0', np.int8), 
        ('nSNR3_Y1', np.int8), 
        ('nSNR3_Y2', np.int8), 
        ('nSNR3_Y3', np.int8), 
        ('nSNR3_Y4', np.int8), 
        ('nSNR3_Y5', np.int8), 
        ('yearSNR3', np.int8),
        ('flagSNR3', np.int8),
        ('nSNR5_Y0', np.int8), 
        ('nSNR5_Y1', np.int8), 
        ('nSNR5_Y2', np.int8), 
        ('nSNR5_Y3', np.int8), 
        ('nSNR5_Y4', np.int8), 
        ('nSNR5_Y5', np.int8), 
        ('yearSNR5', np.int8), 
        ('flagSNR5', np.int8),
        ('cumSNR_Y0', np.float),
        ('cumSNR_Y1', np.float),
        ('cumSNR_Y2', np.float),
        ('cumSNR_Y3', np.float),
        ('cumSNR_Y4', np.float),
        ('cumSNR_Y5', np.float),
        ('cumSNRsgn_Y0', np.float),
        ('cumSNRsgn_Y1', np.float),
        ('cumSNRsgn_Y2', np.float),
        ('cumSNRsgn_Y3', np.float),
        ('cumSNRsgn_Y4', np.float),
        ('cumSNRsgn_Y5', np.float)
    ])
    d = np.empty(0, dtype=dt)
    df = pd.DataFrame(d)

    lcid = []
    lnsnr3 = []
    
    cidlist = canddf['CID']

    for i in range(len(cidlist)):
    #for i in range(0,10):

        print("%d / %d" %(i+1, len(cidlist)), end="\r")
        cid=cidlist[i]
        lcinfo = lcdf[(lcdf['CID']==cid)]

        # number of SNR > 3, 5 points, year with largest nSNR, and 1-/2-season flag
        nSNR3, yearSNR3, flagSNR3, cumSNR, cumSNRsgn = getSNR(lcinfo, 3.0)
        nSNR5, yearSNR5, flagSNR5, cumSNR, cumSNRsgn = getSNR(lcinfo, 5.0)
        #print(nSNR3, yearSNR3, nSNR3[yearSNR3])
        #print(nSNR5, yearSNR5, nSNR5[yearSNR5])

        df = df.append([{'CID': cid,
                         'nSNR3_Y0': nSNR3[0],
                         'nSNR3_Y1': nSNR3[1],
                         'nSNR3_Y2': nSNR3[2],
                         'nSNR3_Y3': nSNR3[3],
                         'nSNR3_Y4': nSNR3[4],
                         'nSNR3_Y5': nSNR3[5],
                         'yearSNR3': yearSNR3,
                         'flagSNR3': flagSNR3,
                         'nSNR5_Y0': nSNR5[0],
                         'nSNR5_Y1': nSNR5[1],
                         'nSNR5_Y2': nSNR5[2],
                         'nSNR5_Y3': nSNR5[3],
                         'nSNR5_Y4': nSNR5[4],
                         'nSNR5_Y5': nSNR5[5],
                         'yearSNR5': yearSNR5,
                         'flagSNR5': flagSNR5,
                         'cumSNR_Y0': cumSNR[0],
                         'cumSNR_Y1': cumSNR[1],
                         'cumSNR_Y2': cumSNR[2],
                         'cumSNR_Y3': cumSNR[3],
                         'cumSNR_Y4': cumSNR[4],
                         'cumSNR_Y5': cumSNR[5],
                         'cumSNRsgn_Y0': cumSNRsgn[0],
                         'cumSNRsgn_Y1': cumSNRsgn[1],
                         'cumSNRsgn_Y2': cumSNRsgn[2],
                         'cumSNRsgn_Y3': cumSNRsgn[3],
                         'cumSNRsgn_Y4': cumSNRsgn[4],
                         'cumSNRsgn_Y5': cumSNRsgn[5]
                        }], ignore_index=True)
        
    return df

## Variability parameter

In [5]:
def line(x, a, b):
    return a + b*x

In [6]:
def quadratic(x, a, b, c):
    return a + b*x + c*x*x

In [7]:
def cubic(x, a, b, c, d):
    return a + b*x + c*x*x + d*x*x*x

In [8]:
def getVAR(lcdata):
    
    # Calculate variability parameter
    # Input:
    #   lcdata = light curve dataframe
    # Output:
    #   var    = 
    

    filters = ["g", "r", "i", "z"]
    
    varPar0 = []
    varPar1 = []
    varParX = []

    for y in range(len(mjdrange)):
            
        # all positive flux errors in a given year
        lc    = lcdata[(lcdata['MJD'] > mjdrange['lo'][y])
                     & (lcdata['MJD'] < mjdrange['hi'][y])
                     & (lcdata['FLUXERR'] > 0.0)]

        cum0SNR2 = 0.0
        cum1SNR2 = 0.0
        cumXSNR2 = 0.0
        
        for f in range(len(filters)):

            xd = lc[lc['BAND']==filters[f]]['MJD']
            xd = xd - mjdrange['lo'][y]
            yd = lc[lc['BAND']==filters[f]]['FLUX']
            e  = lc[lc['BAND']==filters[f]]['FLUXERR']
        
            if len(yd) > 3:
#                popt, pcov = curve_fit(cubic, xd, yd, sigma=e)
                popt, pcov = curve_fit(line, xd, yd, sigma=e)
        
                # S/N of coefficients
                snr0 = np.absolute(popt[0]/(pcov[0,0]**0.5))
                snr1 = np.absolute(popt[1]/(pcov[1,1]**0.5))
            else:
                snr0 = 0.0
                snr1 = 0.0
                
            cum0SNR2 += snr0**2
            cum1SNR2 += snr1**2
            cumXSNR2 += snr0**2 + snr1**2
    
        cum0SNR = np.sqrt(cum0SNR2)
        cum1SNR = np.sqrt(cum1SNR2)
        cumXSNR = np.sqrt(cumXSNR2)

        varPar0.append(cum0SNR)
        varPar1.append(cum1SNR)
        varParX.append(cumXSNR)

    return varPar0, varPar1, varParX

In [9]:
def getVARinfo(canddf, lcdf):

    dt = np.dtype([
        ('CID', np.int64),
        ('VAR0_Y0', np.float),
        ('VAR0_Y1', np.float),
        ('VAR0_Y2', np.float),
        ('VAR0_Y3', np.float),
        ('VAR0_Y4', np.float),
        ('VAR0_Y5', np.float),
        ('VAR1_Y0', np.float),
        ('VAR1_Y1', np.float),
        ('VAR1_Y2', np.float),
        ('VAR1_Y3', np.float),
        ('VAR1_Y4', np.float),
        ('VAR1_Y5', np.float),
        ('VARX_Y0', np.float),
        ('VARX_Y1', np.float),
        ('VARX_Y2', np.float),
        ('VARX_Y3', np.float),
        ('VARX_Y4', np.float),
        ('VARX_Y5', np.float)
    ])
    d = np.empty(0, dtype=dt)
    df = pd.DataFrame(d)

    cidlist = canddf['CID']

    for i in range(len(cidlist)):
    #for i in range(8679,8680):

        print("%d / %d" %(i+1, len(cidlist)), end="\r")
        cid=cidlist[i]
        lcinfo = lcdf[(lcdf['CID']==cid)]

        # variability parameter
        varPar0, varPar1, varParX = getVAR(lcinfo)

        df = df.append([{'CID': cid,
                         'VAR0_Y0': varPar0[0],
                         'VAR0_Y1': varPar0[1],
                         'VAR0_Y2': varPar0[2],
                         'VAR0_Y3': varPar0[3],
                         'VAR0_Y4': varPar0[4],
                         'VAR0_Y5': varPar0[5],
                         'VAR1_Y0': varPar1[0],
                         'VAR1_Y1': varPar1[1],
                         'VAR1_Y2': varPar1[2],
                         'VAR1_Y3': varPar1[3],
                         'VAR1_Y4': varPar1[4],
                         'VAR1_Y5': varPar1[5],
                         'VARX_Y0': varParX[0],
                         'VARX_Y1': varParX[1],
                         'VARX_Y2': varParX[2],
                         'VARX_Y3': varParX[3],
                         'VARX_Y4': varParX[4],
                         'VARX_Y5': varParX[5]
                        }], ignore_index=True)
        
    return df

In [10]:
#TESTinfodf = getVARinfo(canddf, lcdf)

In [11]:
#TESTinfodf[['CID', 
#            'VAR0_Y1', 'VAR0_Y2', 'VAR0_Y3', 'VAR0_Y4', 'VAR0_Y5',
#            'VAR1_Y1', 'VAR1_Y2', 'VAR1_Y3', 'VAR1_Y4', 'VAR1_Y5',
#            'VARX_Y1', 'VARX_Y2', 'VARX_Y3', 'VARX_Y4', 'VARX_Y5']]

## Bazin fits (under development)

In [111]:
def bazin(t, A, B, t0, tfall, trise):
    return A * np.exp(-(t-t0)/tfall) / (1.0 + np.exp((t-t0)/trise)) + B

In [112]:
def getBAZIN(lcdata):
    
    # Perform Bazin fit
    # Input:
    #   lcdata = light curve dataframe
    # Output:
    #   var    = 
    

    filters = ["g", "r", "i", "z"]
    
    bazinA = []

    for y in range(len(mjdrange)):
            
        # all positive flux errors in a given year
        lc    = lcdata[(lcdata['MJD'] > mjdrange['lo'][y])
                     & (lcdata['MJD'] < mjdrange['hi'][y])
                     & (lcdata['FLUXERR'] > 0.0)]
        
        for f in range(len(filters)):

            xd = lc[lc['BAND']==filters[f]]['MJD']
            xd = xd - mjdrange['lo'][y]
            yd = lc[lc['BAND']==filters[f]]['FLUX']
            e  = lc[lc['BAND']==filters[f]]['FLUXERR']
            
            print(xd)
            print(yd)
            print(e)
            #t0guessidx = np.argmax(yd)
            #t0guess    = xd[t0guessidx]
        
            if len(yd) > 3:
                popt, pcov = curve_fit(bazin, xd, yd, sigma=e, p0=(100., 0., 30., 30., 10.))
        
                # S/N of linear, quadratic, and cubic coefficients
                #snr1 = np.absolute(popt[1]/(pcov[1,1]**0.5))
                #snr2 = np.absolute(popt[2]/(pcov[2,2]**0.5))
                #snr3 = np.absolute(popt[3]/(pcov[3,3]**0.5))
            else:
                snr1 = 0.0
                snr2 = 0.0
                snr3 = 0.0
                
                
        #cum1SNR = np.sqrt(cum1SNR2)

        #varPar1.append(cum1SNR)

    return bazinA

In [113]:
def getBAZINinfo(canddf, lcdf):

    dt = np.dtype([
        ('CID', np.int64),
        ('VAR1_Y0', np.float),
        ('VAR1_Y1', np.float),
        ('VAR1_Y2', np.float),
        ('VAR1_Y3', np.float),
        ('VAR1_Y4', np.float),
        ('VAR1_Y5', np.float)
    ])
    d = np.empty(0, dtype=dt)
    df = pd.DataFrame(d)

    cidlist = canddf['CID']

    #for i in range(len(cidlist)):
    for i in range(0,1):

        print("%d / %d" %(i+1, len(cidlist)), end="\r")
        cid=cidlist[i]
        lcinfo = lcdf[(lcdf['CID']==cid)]

        # variability parameter
        bazinA = getBAZIN(lcinfo)

        df = df.append([{'CID': cid,
                         'VAR1_Y0': varPar1[0],
                         'VAR1_Y1': varPar1[1],
                         'VAR1_Y2': varPar1[2],
                         'VAR1_Y3': varPar1[3],
                         'VAR1_Y4': varPar1[4],
                         'VAR1_Y5': varPar1[5]
                        }], ignore_index=True)
        
    return df

In [114]:
#TESTinfodf = getBAZINinfo(canddf, lcdf)

## Peak magnitudes

In [12]:
def getMinMag(lcdata):

    filters = ["g", "r", "i", "z"]

    minmags = []

    for f in range(len(filters)):

        lc = lcdata[(lcdata['BAND'] == filters[f])
                    & (lcdata['FLUX'] > 0.0)]
        nlc = len(lc)

        if (nlc > 0):
            maxflux = np.nanmax(lc['FLUX'])
            if (maxflux > 0.0):
                minmags.append(27.5 - 2.5*np.log10(maxflux))
            else:
                minmags.append(-99.9999)
        else:
            minmags.append(-99.9999)
            
    return minmags

In [13]:
def getMinMags(canddf, lcdf):

    dt = np.dtype([
        ('CID', np.int64),
        ('minmag_g', np.float), 
        ('minmag_r', np.float), 
        ('minmag_i', np.float), 
        ('minmag_z', np.float)
    ])
    d = np.empty(0, dtype=dt)
    df = pd.DataFrame(d)
    
    cidlist = canddf['CID']

    for i in range(len(cidlist)):
    #for i in range(0,3):

        print("%d / %d" %(i+1, len(cidlist)), end="\r")
        cid=cidlist[i]
        lcinfo = lcdf[(lcdf['CID']==cid)]

        minmags = getMinMag(lcinfo)
    
        df = df.append([{'CID': int(cid),
                         'minmag_g': minmags[0],
                         'minmag_r': minmags[1],
                         'minmag_i': minmags[2],
                         'minmag_z': minmags[3]
                        }], ignore_index=True)

    return df

## Peak magnitudes and MJD in griz

In [14]:
def getPeak(lcdata, year):

    filters = ["g", "r", "i", "z"]

    peakmags = []
    peakmjds = []
    

    for f in range(len(filters)):
            
        if (year >= 0):

            lc = lcdata[(lcdata['BAND'] == filters[f])
                        & (lcdata['FLUXERR'] > 0.0)
                        & (lcdata['MJD'] > mjdrange['lo'][year])
                        & (lcdata['MJD'] < mjdrange['hi'][year])]
            nlc = len(lc)

            if (nlc > 0):
                argmax   = lc['FLUX'].idxmax()
                maxflux = lc['FLUX'][argmax]
                maxmjd  = lc['MJD'][argmax]
                #print(argmax, maxflux, maxmjd)
                
                if (maxflux > 0.0):
                    peakmags.append(27.5 - 2.5*np.log10(maxflux))
                    peakmjds.append(maxmjd)
                else:
                    peakmags.append(-99.9999)
                    peakmjds.append(-99.9999)
            else:
                peakmags.append(-99.9999)
                peakmjds.append(-99.9999)
        else:
            peakmags.append(-99.9999)
            peakmjds.append(-99.9999)
            
    return peakmags, peakmjds

In [15]:
def getPeakInfo(canddf, lcdf):

    dt = np.dtype([
        ('CID', np.int64),
        ('peakmag_g', np.float), 
        ('peakmag_r', np.float), 
        ('peakmag_i', np.float), 
        ('peakmag_z', np.float),
        ('peakmjd_g', np.float), 
        ('peakmjd_r', np.float), 
        ('peakmjd_i', np.float), 
        ('peakmjd_z', np.float)
    ])
    d = np.empty(0, dtype=dt)
    df = pd.DataFrame(d)
    
    cidlist  = canddf['CID']
    yearlist = canddf['yearSNR3']

    for i in range(len(cidlist)):
    #for i in range(0,3):

        print("%d / %d" %(i+1, len(cidlist)), end="\r")
        cid  = cidlist[i]
        year = yearlist[i]
        lcinfo = lcdf[(lcdf['CID']==cid)]

        peakmags, peakmjds = getPeak(lcinfo, year)
    
        df = df.append([{'CID': int(cid),
                         'peakmag_g': peakmags[0],
                         'peakmag_r': peakmags[1],
                         'peakmag_i': peakmags[2],
                         'peakmag_z': peakmags[3],
                         'peakmjd_g': peakmjds[0],
                         'peakmjd_r': peakmjds[1],
                         'peakmjd_i': peakmjds[2],
                         'peakmjd_z': peakmjds[3]
                        }], ignore_index=True)

    return df

## Number of pre-explosion, non-detection measurements in griz

In [16]:
def getNonDet(lcdata, year):

    filters = ["g", "r", "i", "z"]

    nnondet = []

    snrthres = 5.0
        
    for f in range(len(filters)):
            
        if (year >= 0):

            lc = lcdata[(lcdata['BAND'] == filters[f])
                        & (lcdata['FLUXERR'] > 0.0)
                        & (lcdata['MJD'] > mjdrange['lo'][year])
                        & (lcdata['MJD'] < mjdrange['hi'][year])]
            lc = lc.reset_index()
            nlc = len(lc)

#            print(lc['FLUX'])
#            print(lc['FLUXERR'])

            if (nlc > 0):
            
                ncount = 0
                b = 0
                while (b < nlc-1) & (lc['FLUX'][b]/lc['FLUXERR'][b] < snrthres):
                    ncount += 1
                    b += 1

                nnondet.append(ncount)
            else:
                nnondet.append(0)

        else:
            nnondet.append(0)
            
    return nnondet

In [17]:
def getNnonDet(canddf, lcdf):

    dt = np.dtype([
        ('CID', np.int64),
        ('nnondet_g', np.int64), 
        ('nnondet_r', np.int64), 
        ('nnondet_i', np.int64), 
        ('nnondet_z', np.int64)
    ])
    d = np.empty(0, dtype=dt)
    df = pd.DataFrame(d)
    
    cidlist  = canddf['CID']
    yearlist = canddf['yearSNR3']

    for i in range(len(cidlist)):
    #for i in range(0,3):

        print("%d / %d" %(i+1, len(cidlist)), end="\r")
        cid  = cidlist[i]
        year = yearlist[i]
        lcinfo = lcdf[(lcdf['CID']==cid)]

        nnondet = getNonDet(lcinfo, year)
    
        df = df.append([{'CID': int(cid),
                         'nnondet_g': nnondet[0],
                         'nnondet_r': nnondet[1],
                         'nnondet_i': nnondet[2],
                         'nnondet_z': nnondet[3]
                        }], ignore_index=True)

    return df

## First detection day measured from August 1

In [18]:
def getFirstDetDay(lcdata, year):

    filters = ["g", "r", "i", "z"]

    firstdetday = []
    
    snrthres = 5.0
        
    for f in range(len(filters)):
            
        if (year >= 0):

            lc = lcdata[(lcdata['BAND'] == filters[f])
                        & (lcdata['FLUXERR'] > 0.0)
                        & (lcdata['MJD'] > mjdrange['lo'][year])
                        & (lcdata['MJD'] < mjdrange['hi'][year])]
            lc = lc.reset_index()
            nlc = len(lc)

#            print(lc['FLUX'])
#            print(lc['FLUXERR'])

            if (nlc > 0):
            
                found   = 0
                thismjd = 0.0
                for b in range(len(lc)):
                    if (found == 0) & (lc['FLUX'][b]/lc['FLUXERR'][b] > snrthres):
                        thismjd = lc['MJD'][b] - mjdrange['lo'][year]
                        found = 1

                firstdetday.append(thismjd)
            else:
                firstdetday.append(0)

        else:
            firstdetday.append(0)
            
    return firstdetday

In [19]:
def getFirstDetDays(canddf, lcdf):

    dt = np.dtype([
        ('CID', np.int64),
        ('firstdetday_g', np.float), 
        ('firstdetday_r', np.float), 
        ('firstdetday_i', np.float), 
        ('firstdetday_z', np.float)
    ])
    d = np.empty(0, dtype=dt)
    df = pd.DataFrame(d)
    
    cidlist  = canddf['CID']
    yearlist = canddf['yearSNR3']

    for i in range(len(cidlist)):
    #for i in range(0,10):

        print("%d / %d" %(i+1, len(cidlist)), end="\r")
        cid  = cidlist[i]
        year = yearlist[i]
        lcinfo = lcdf[(lcdf['CID']==cid)]

        firstdetdays = getFirstDetDay(lcinfo, year)
    
        df = df.append([{'CID': int(cid),
                         'firstdetday_g': firstdetdays[0],
                         'firstdetday_r': firstdetdays[1],
                         'firstdetday_i': firstdetdays[2],
                         'firstdetday_z': firstdetdays[3]
                        }], ignore_index=True)

    return df

# OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
# Main
# OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO

In [21]:
#candfile = BASEDIR + 'DESALL_forcePhoto_real_snana_fits/FITOPT000.FITRES'
#lcfile   = BASEDIR + 'DESALL_forcePhoto_real_snana_fits/FITOPT000.LCPLOT'
candfile = BASEDIR + 'DES_ALL/DESALL_forcePhoto_real_snana_fits/FITOPT000.FITRES'
lcfile   = BASEDIR + 'DES_ALL/DESALL_forcePhoto_real_snana_fits/FITOPT000.LCPLOT'

In [22]:
canddf = pd.read_csv(candfile, comment='#', delim_whitespace=True)
'''
canddf = canddf.drop(columns=['VARNAMES:', 'IDSURVEY', 'CUTFLAG_SNANA',
       'zHEL', 'zHELERR', 'zCMB', 'zCMBERR', 'VPEC', 'VPECERR',
       'HOST_sSFR', 'HOST_sSFR_ERR', 'ITYPE_BEST', 'SIM_ITYPE', 'Z_Ia',
       'SHAPEPAR_Ia', 'COLORPAR_Ia', 'COLORLAW_Ia', 'TMAX_Ia', 'DMU_Ia',
       'TOBSMIN_Ia', 'TOBSMAX_Ia', 'CHI2_Ia', 'NPT_Ia', 'FITPROB_Ia',
       'PBAYES_Ia', 'LCQ_Ia', 'Ze_Ia', 'SHAPEPARe_Ia', 'COLORPARe_Ia',
       'COLORLAWe_Ia', 'TMAXe_Ia', 'DMUe_Ia', 'PEAKMAG_r_Ia',
       'PEAKMAG_i_Ia', 'Z_MODEL1', 'NONIA_INDEX_MODEL1',
       'COLORPAR_MODEL1', 'COLORLAW_MODEL1', 'TMAX_MODEL1', 'DMU_MODEL1',
       'TOBSMIN_MODEL1', 'TOBSMAX_MODEL1', 'CHI2_MODEL1', 'NPT_MODEL1',
       'FITPROB_MODEL1', 'PBAYES_MODEL1', 'LCQ_MODEL1',
       'PEAKMAG_r_MODEL1', 'PEAKMAG_i_MODEL1', 'Z_MODEL2',
       'NONIA_INDEX_MODEL2', 'COLORPAR_MODEL2', 'COLORLAW_MODEL2',
       'TMAX_MODEL2', 'DMU_MODEL2', 'TOBSMIN_MODEL2', 'TOBSMAX_MODEL2',
       'CHI2_MODEL2', 'NPT_MODEL2', 'FITPROB_MODEL2', 'PBAYES_MODEL2',
       'LCQ_MODEL2', 'PEAKMAG_r_MODEL2', 'PEAKMAG_i_MODEL2', 'Z_MODEL3',
       'NONIA_INDEX_MODEL3', 'COLORPAR_MODEL3', 'COLORLAW_MODEL3',
       'TMAX_MODEL3', 'DMU_MODEL3', 'TOBSMIN_MODEL3', 'TOBSMAX_MODEL3',
       'CHI2_MODEL3', 'NPT_MODEL3', 'FITPROB_MODEL3', 'PBAYES_MODEL3',
       'LCQ_MODEL3', 'PEAKMAG_r_MODEL3', 'PEAKMAG_i_MODEL3', 'Z_MODEL4',
       'NONIA_INDEX_MODEL4', 'COLORPAR_MODEL4', 'COLORLAW_MODEL4',
       'TMAX_MODEL4', 'DMU_MODEL4', 'TOBSMIN_MODEL4', 'TOBSMAX_MODEL4',
       'CHI2_MODEL4', 'NPT_MODEL4', 'FITPROB_MODEL4', 'PBAYES_MODEL4',
       'LCQ_MODEL4', 'PEAKMAG_r_MODEL4', 'PEAKMAG_i_MODEL4'], axis=1)
       '''
canddf = canddf.drop(columns=['VARNAMES:', 'IDSURVEY', 'CUTFLAG_SNANA',
       'zHEL', 'zHELERR', 'zCMB', 'zCMBERR', 'VPEC', 'VPECERR',
       'HOST_sSFR', 'HOST_sSFR_ERR', 'ITYPE_BEST', 'SIM_ITYPE', 'Z_Ia',
       'SHAPEPAR_Ia', 'COLORPAR_Ia', 'COLORLAW_Ia', 'TMAX_Ia', 'DMU_Ia',
       'TOBSMIN_Ia', 'TOBSMAX_Ia', 'CHI2_Ia', 'NPT_Ia', 'FITPROB_Ia',
       'PBAYES_Ia', 'LCQ_Ia', 'Ze_Ia', 'SHAPEPARe_Ia', 'COLORPARe_Ia',
       'COLORLAWe_Ia', 'TMAXe_Ia', 'DMUe_Ia', 'PEAKMAG_r_Ia', 'PEAKMAG_i_Ia',
       'Z_Ibc', 'NONIA_INDEX_Ibc', 'COLORPAR_Ibc', 'COLORLAW_Ibc', 'TMAX_Ibc',
       'DMU_Ibc', 'TOBSMIN_Ibc', 'TOBSMAX_Ibc', 'CHI2_Ibc', 'NPT_Ibc',
       'FITPROB_Ibc', 'PBAYES_Ibc', 'LCQ_Ibc', 'PEAKMAG_r_Ibc',
       'PEAKMAG_i_Ibc', 'Z_II', 'NONIA_INDEX_II', 'COLORPAR_II', 'COLORLAW_II',
       'TMAX_II', 'DMU_II', 'TOBSMIN_II', 'TOBSMAX_II', 'CHI2_II', 'NPT_II',
       'FITPROB_II', 'PBAYES_II', 'LCQ_II', 'PEAKMAG_r_II', 'PEAKMAG_i_II'],
       axis=1)

In [23]:
canddf.columns

Index(['CID', 'TYPE', 'FIELD', 'zHD', 'zHDERR', 'MWEBV', 'HOST_NMATCH',
       'HOST_NMATCH2', 'HOST_OBJID', 'HOST_ZPHOT', 'HOST_ZPHOTERR',
       'HOST_ZSPEC', 'HOST_ZSPECERR', 'HOST_RA', 'HOST_DEC', 'HOST_ANGSEP',
       'HOST_DDLR', 'HOST_CONFUSION', 'HOST_LOGMASS', 'HOST_LOGMASS_ERR',
       'SNRMAX1', 'SNRMAX2', 'SNRMAX3'],
      dtype='object')

In [24]:
fname = BASEDIR + 'CIDcoord.csv'
cidcoorddf = pd.read_csv(fname)

In [25]:
cidcoorddf

Unnamed: 0,CID,TRANSIENT_NAME,RA,DEC
0,1246273,DES13C1bgrw,54.566567,-27.994892
1,1246274,DES13C1amtw,54.642803,-28.040766
2,1246275,DES13C1e,54.647026,-26.401205
3,1246276,DES13C1axel,54.897595,-26.387381
4,1246278,DES13C1wg,53.438839,-27.017366
...,...,...,...,...
31631,1997029,,53.535008,-29.469399
31632,1997033,DES17C2jpr,54.307945,-28.739864
31633,1998019,DES17C2jpq,53.606541,-28.708450
31634,1999022,,53.430145,-29.181150


In [26]:
canddf = pd.merge(canddf, cidcoorddf, on='CID', how='left')

In [27]:
canddf

Unnamed: 0,CID,TYPE,FIELD,zHD,zHDERR,MWEBV,HOST_NMATCH,HOST_NMATCH2,HOST_OBJID,HOST_ZPHOT,...,HOST_DDLR,HOST_CONFUSION,HOST_LOGMASS,HOST_LOGMASS_ERR,SNRMAX1,SNRMAX2,SNRMAX3,TRANSIENT_NAME,RA,DEC
0,1246273,0,C1,2.3522,0.0010,0.010097,1,1,590,1.43312,...,0.055,-3.804,11.37,0.05,22.12803,15.77272,13.33338,DES13C1bgrw,54.566567,-27.994892
1,1246344,0,C1,-9.0000,-9.0000,0.012869,1,2,337,1.02155,...,0.305,-3.766,-999.00,-999.00,28.58938,19.07793,13.84359,DES13C1fdr,54.132809,-27.500887
2,1246433,0,X3,1.2206,0.0007,0.027303,1,1,19800,0.28819,...,0.417,-3.755,10.17,0.02,34.43603,34.36990,30.19969,DES13X3bhsk,36.360428,-4.051464
3,1246515,0,X3,-9.0000,-9.0000,0.025877,1,1,18292,0.78169,...,1.088,-0.062,-999.00,-999.00,24.49782,21.23731,15.28282,DES13X3pp,36.393536,-4.289726
4,1246618,0,X3,-9.0000,-9.0000,0.026634,1,3,156526,0.46001,...,0.141,1.187,-999.00,-999.00,6.16193,5.89866,2.38890,DES13X3va,37.176445,-4.349217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31631,1965102,0,C1,-9.0000,-9.0000,0.010881,1,1,153121,0.82337,...,2.775,-1.266,-999.00,-999.00,3.63013,2.47477,2.23788,DES17C1jjz,53.843685,-27.083427
31632,1968025,0,C2,-9.0000,-9.0000,0.014389,2,4,-58337,-9.00000,...,0.985,-2.787,-999.00,-999.00,4.28363,2.28496,1.99382,DES17C2jla,55.081097,-29.595484
31633,1970027,0,C2,-9.0000,-9.0000,0.012923,1,1,153398,0.43243,...,0.070,-4.252,-999.00,-999.00,4.72036,3.32816,3.05189,DES17C2jmq,55.143509,-28.889006
31634,1979080,0,C3,-9.0000,-9.0000,0.008402,1,1,153596,0.71025,...,0.929,-1.814,-999.00,-999.00,4.39773,2.32953,2.04287,DES17C3joe,52.163025,-27.576025


In [28]:
lcdf = pd.read_csv(lcfile, comment='#', delim_whitespace=True, header=None,
                  names=['CID', 'MJD', 'PHASE', 'FLUX', 'FLUXERR', 'FLAG', 'BAND', 'CHI2', 'FIT'])
lcdf = lcdf[(lcdf['FLAG'] != 0) & (lcdf['FIT'] == 1)]

In [29]:
lcdf = lcdf.drop(columns=['PHASE', 'FLAG', 'CHI2', 'FIT'], axis=1)

In [30]:
lcdf

Unnamed: 0,CID,MJD,FLUX,FLUXERR,BAND
0,1246273,56534.218,98.082000,7.5310,g
1,1246273,56538.365,107.100000,5.0717,g
2,1246273,56543.310,108.410000,5.2318,g
3,1246273,56547.268,104.530000,5.0952,g
4,1246273,56551.245,68.975000,22.7580,g
...,...,...,...,...,...
88802998,1989022,58117.114,1.710300,6.8404,z
88802999,1989022,58124.181,0.091458,3.2354,z
88803000,1989022,58132.066,2.289100,3.3801,z
88803001,1989022,58138.077,6.778000,5.2872,z


## Count number of detections above SNR = 3 and 5, and flag 1-/2-season transients

In [31]:
# get SNR info from light curves

fname = BASEDIR + "LC_SNRinfo.csv"

if os.path.exists(fname):
    SNRinfodf = pd.read_csv(fname)
    
else:
    SNRinfodf = getSNRinfo(canddf, lcdf)
    SNRinfodf.to_csv(fname, index=False)

In [32]:
SNRinfodf

Unnamed: 0,CID,nSNR3_Y0,nSNR3_Y1,nSNR3_Y2,nSNR3_Y3,nSNR3_Y4,nSNR3_Y5,yearSNR3,flagSNR3,nSNR5_Y0,...,cumSNR_Y2,cumSNR_Y3,cumSNR_Y4,cumSNR_Y5,cumSNRsgn_Y0,cumSNRsgn_Y1,cumSNRsgn_Y2,cumSNRsgn_Y3,cumSNRsgn_Y4,cumSNRsgn_Y5
0,1246273,0,110,0,44,105,43,1,0,0,...,80.124939,34.815408,72.033215,36.469810,0.0,103.658209,-80.124939,34.655768,72.033074,36.429359
1,1246344,0,109,0,40,80,26,1,0,0,...,78.238657,36.952167,58.133895,33.089349,0.0,114.545219,-78.238657,36.932412,58.123882,33.056357
2,1246433,0,42,0,90,6,0,3,0,0,...,33.149739,150.755758,28.508381,50.735715,0.0,32.451597,-33.143314,150.755758,-23.313562,-50.735698
3,1246515,0,41,0,0,0,0,1,1,0,...,48.795150,9.315674,9.186923,8.517339,0.0,80.532464,-48.794613,-5.099551,-3.803036,-3.587029
4,1246618,0,7,0,0,0,0,1,1,0,...,8.436354,8.216062,8.519579,8.778539,0.0,13.440899,-6.711349,-1.754571,-2.380446,-3.702862
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31631,1965102,0,1,0,0,0,2,5,0,0,...,8.922275,11.249796,12.563320,11.489596,0.0,5.454267,-4.755483,1.830699,-1.660333,-0.572943
31632,1968025,0,0,0,0,0,3,5,0,0,...,8.142095,8.822178,9.100574,11.956780,0.0,-6.576033,5.252307,-1.623247,3.197511,8.883983
31633,1970027,0,0,0,0,2,8,5,1,0,...,7.316070,8.204630,12.696610,15.295652,0.0,-9.895510,3.954867,5.397113,11.209533,14.666175
31634,1979080,0,0,0,0,0,2,5,0,0,...,9.123499,8.283238,10.537630,12.212630,0.0,-4.513569,-6.813105,-4.649725,-4.151386,7.100430


In [33]:
SNRinfodf[SNRinfodf['CID']==1252129]

Unnamed: 0,CID,nSNR3_Y0,nSNR3_Y1,nSNR3_Y2,nSNR3_Y3,nSNR3_Y4,nSNR3_Y5,yearSNR3,flagSNR3,nSNR5_Y0,...,cumSNR_Y2,cumSNR_Y3,cumSNR_Y4,cumSNR_Y5,cumSNRsgn_Y0,cumSNRsgn_Y1,cumSNRsgn_Y2,cumSNRsgn_Y3,cumSNRsgn_Y4,cumSNRsgn_Y5
1113,1252129,0,45,0,0,0,0,1,1,0,...,49.226955,5.435004,6.602583,6.493527,0.0,63.225093,-49.226955,0.850405,-3.521022,-3.92922


In [34]:
canddf = pd.merge(canddf, SNRinfodf, on='CID', how='left')

In [35]:
canddf.columns

Index(['CID', 'TYPE', 'FIELD', 'zHD', 'zHDERR', 'MWEBV', 'HOST_NMATCH',
       'HOST_NMATCH2', 'HOST_OBJID', 'HOST_ZPHOT', 'HOST_ZPHOTERR',
       'HOST_ZSPEC', 'HOST_ZSPECERR', 'HOST_RA', 'HOST_DEC', 'HOST_ANGSEP',
       'HOST_DDLR', 'HOST_CONFUSION', 'HOST_LOGMASS', 'HOST_LOGMASS_ERR',
       'SNRMAX1', 'SNRMAX2', 'SNRMAX3', 'TRANSIENT_NAME', 'RA', 'DEC',
       'nSNR3_Y0', 'nSNR3_Y1', 'nSNR3_Y2', 'nSNR3_Y3', 'nSNR3_Y4', 'nSNR3_Y5',
       'yearSNR3', 'flagSNR3', 'nSNR5_Y0', 'nSNR5_Y1', 'nSNR5_Y2', 'nSNR5_Y3',
       'nSNR5_Y4', 'nSNR5_Y5', 'yearSNR5', 'flagSNR5', 'cumSNR_Y0',
       'cumSNR_Y1', 'cumSNR_Y2', 'cumSNR_Y3', 'cumSNR_Y4', 'cumSNR_Y5',
       'cumSNRsgn_Y0', 'cumSNRsgn_Y1', 'cumSNRsgn_Y2', 'cumSNRsgn_Y3',
       'cumSNRsgn_Y4', 'cumSNRsgn_Y5'],
      dtype='object')

## Variability info

In [36]:
# get variability parameter from light curves

fname = BASEDIR + "LC_VARinfo.csv"

if os.path.exists(fname):
    VARinfodf = pd.read_csv(fname)
    
else:
    VARinfodf = getVARinfo(canddf, lcdf)
    VARinfodf.to_csv(fname, index=False)

31636 / 31636

In [37]:
VARinfodf

Unnamed: 0,CID,VAR0_Y0,VAR0_Y1,VAR0_Y2,VAR0_Y3,VAR0_Y4,VAR0_Y5,VAR1_Y0,VAR1_Y1,VAR1_Y2,VAR1_Y3,VAR1_Y4,VAR1_Y5,VARX_Y0,VARX_Y1,VARX_Y2,VARX_Y3,VARX_Y4,VARX_Y5
0,1246273,0.0,84.774499,40.098399,5.831721,39.970556,9.228657,0.0,27.074181,10.250276,11.742476,6.133548,10.965042,0.0,88.992848,41.387797,13.110862,40.438420,14.331792
1,1246344,0.0,53.580928,32.648747,17.797304,5.730371,6.757750,0.0,15.408647,8.855506,3.525090,10.307695,8.632201,0.0,55.752509,33.828401,18.143051,11.793461,10.962759
2,1246433,0.0,23.824748,16.269631,24.284455,13.075300,27.278058,0.0,9.527129,7.596636,9.096997,21.444307,3.309077,0.0,25.659010,17.955773,25.932414,25.116166,27.478036
3,1246515,0.0,31.848837,17.930914,3.192429,2.442704,3.731054,0.0,22.747261,1.353826,1.805746,1.641828,3.473340,0.0,39.138042,17.981950,3.667741,2.943196,5.097535
4,1246618,0.0,8.720522,0.992767,1.107135,1.950598,2.622762,0.0,6.629892,1.902530,0.807166,1.921931,2.585253,0.0,10.954587,2.145974,1.370133,2.738367,3.682718
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31631,1965102,0.0,1.861548,2.632471,2.759326,3.582368,1.844021,0.0,2.480660,1.419547,2.334730,1.901986,3.650859,0.0,3.101457,2.990822,3.614533,4.055973,4.090133
31632,1968025,0.0,1.734359,2.942943,1.557805,1.881208,1.162075,0.0,0.987956,1.181036,1.012188,1.820446,2.108383,0.0,1.996011,3.171082,1.857762,2.617817,2.407425
31633,1970027,0.0,5.227835,4.409689,3.279727,5.449053,5.885318,0.0,2.099326,2.814286,1.527553,2.228679,1.753141,0.0,5.633598,5.231211,3.618014,5.887206,6.140885
31634,1979080,0.0,3.176019,3.311544,2.755776,4.280154,2.615380,0.0,2.234349,3.536216,2.528424,3.466622,2.737238,0.0,3.883222,4.844703,3.739950,5.507920,3.785853


In [38]:
canddf = pd.merge(canddf, VARinfodf, on='CID', how='left')

In [39]:
canddf.columns

Index(['CID', 'TYPE', 'FIELD', 'zHD', 'zHDERR', 'MWEBV', 'HOST_NMATCH',
       'HOST_NMATCH2', 'HOST_OBJID', 'HOST_ZPHOT', 'HOST_ZPHOTERR',
       'HOST_ZSPEC', 'HOST_ZSPECERR', 'HOST_RA', 'HOST_DEC', 'HOST_ANGSEP',
       'HOST_DDLR', 'HOST_CONFUSION', 'HOST_LOGMASS', 'HOST_LOGMASS_ERR',
       'SNRMAX1', 'SNRMAX2', 'SNRMAX3', 'TRANSIENT_NAME', 'RA', 'DEC',
       'nSNR3_Y0', 'nSNR3_Y1', 'nSNR3_Y2', 'nSNR3_Y3', 'nSNR3_Y4', 'nSNR3_Y5',
       'yearSNR3', 'flagSNR3', 'nSNR5_Y0', 'nSNR5_Y1', 'nSNR5_Y2', 'nSNR5_Y3',
       'nSNR5_Y4', 'nSNR5_Y5', 'yearSNR5', 'flagSNR5', 'cumSNR_Y0',
       'cumSNR_Y1', 'cumSNR_Y2', 'cumSNR_Y3', 'cumSNR_Y4', 'cumSNR_Y5',
       'cumSNRsgn_Y0', 'cumSNRsgn_Y1', 'cumSNRsgn_Y2', 'cumSNRsgn_Y3',
       'cumSNRsgn_Y4', 'cumSNRsgn_Y5', 'VAR0_Y0', 'VAR0_Y1', 'VAR0_Y2',
       'VAR0_Y3', 'VAR0_Y4', 'VAR0_Y5', 'VAR1_Y0', 'VAR1_Y1', 'VAR1_Y2',
       'VAR1_Y3', 'VAR1_Y4', 'VAR1_Y5', 'VARX_Y0', 'VARX_Y1', 'VARX_Y2',
       'VARX_Y3', 'VARX_Y4', 'VARX_Y5'],
      dtype

## griz peak mags from full light curves

In [40]:
# get mininum magnitude from full light curves

fname = BASEDIR + "LC_MinMags.csv"

if os.path.exists(fname):
    minmagdf = pd.read_csv(fname)
    
else:
    minmagdf = getMinMags(canddf, lcdf)
    minmagdf.to_csv(fname, index=False)

In [41]:
minmagdf

Unnamed: 0,CID,minmag_g,minmag_r,minmag_i,minmag_z
0,1246273,21.937115,22.511208,22.459611,22.281168
1,1246344,22.095532,22.164398,22.006799,21.732009
2,1246433,22.320203,22.250798,22.223778,22.459820
3,1246515,24.061252,23.084712,23.050991,23.069104
4,1246618,24.204789,23.126780,24.401626,24.045498
...,...,...,...,...,...
31631,1965102,22.724093,23.616688,22.650304,21.856464
31632,1968025,23.668337,23.203787,21.833363,23.433610
31633,1970027,22.740350,23.097700,22.154796,20.973935
31634,1979080,23.924087,24.536853,24.426756,23.667159


In [42]:
canddf = pd.merge(canddf, minmagdf, on='CID', how='left')

In [43]:
canddf.columns

Index(['CID', 'TYPE', 'FIELD', 'zHD', 'zHDERR', 'MWEBV', 'HOST_NMATCH',
       'HOST_NMATCH2', 'HOST_OBJID', 'HOST_ZPHOT', 'HOST_ZPHOTERR',
       'HOST_ZSPEC', 'HOST_ZSPECERR', 'HOST_RA', 'HOST_DEC', 'HOST_ANGSEP',
       'HOST_DDLR', 'HOST_CONFUSION', 'HOST_LOGMASS', 'HOST_LOGMASS_ERR',
       'SNRMAX1', 'SNRMAX2', 'SNRMAX3', 'TRANSIENT_NAME', 'RA', 'DEC',
       'nSNR3_Y0', 'nSNR3_Y1', 'nSNR3_Y2', 'nSNR3_Y3', 'nSNR3_Y4', 'nSNR3_Y5',
       'yearSNR3', 'flagSNR3', 'nSNR5_Y0', 'nSNR5_Y1', 'nSNR5_Y2', 'nSNR5_Y3',
       'nSNR5_Y4', 'nSNR5_Y5', 'yearSNR5', 'flagSNR5', 'cumSNR_Y0',
       'cumSNR_Y1', 'cumSNR_Y2', 'cumSNR_Y3', 'cumSNR_Y4', 'cumSNR_Y5',
       'cumSNRsgn_Y0', 'cumSNRsgn_Y1', 'cumSNRsgn_Y2', 'cumSNRsgn_Y3',
       'cumSNRsgn_Y4', 'cumSNRsgn_Y5', 'VAR0_Y0', 'VAR0_Y1', 'VAR0_Y2',
       'VAR0_Y3', 'VAR0_Y4', 'VAR0_Y5', 'VAR1_Y0', 'VAR1_Y1', 'VAR1_Y2',
       'VAR1_Y3', 'VAR1_Y4', 'VAR1_Y5', 'VARX_Y0', 'VARX_Y1', 'VARX_Y2',
       'VARX_Y3', 'VARX_Y4', 'VARX_Y5', 'minmag_g', 

In [44]:
canddf[canddf['yearSNR3'] < 0][['CID', 'nSNR3_Y0', 'nSNR3_Y1', 'nSNR3_Y2', 'nSNR3_Y3', 'nSNR3_Y4', 'nSNR3_Y5', 'yearSNR3']]

Unnamed: 0,CID,nSNR3_Y0,nSNR3_Y1,nSNR3_Y2,nSNR3_Y3,nSNR3_Y4,nSNR3_Y5,yearSNR3
141,1266734,0,0,0,0,0,0,-1
169,1276336,0,0,0,0,0,0,-1
861,1878161,0,0,0,0,0,0,-1
864,1878529,0,0,0,0,0,0,-1
906,1895078,0,0,0,0,0,0,-1
...,...,...,...,...,...,...,...,...
30898,1305326,0,0,0,0,0,0,-1
31302,1599039,0,0,0,0,0,0,-1
31386,1794030,0,0,0,0,0,0,-1
31530,1906522,0,0,0,0,0,0,-1


## griz peak mags and MJDs from yearSNR3 light curves

In [45]:
# get peak info from yearSNR3 light curves

fname = BASEDIR + "LC_PeakInfo.csv"

if os.path.exists(fname):
    peakinfodf = pd.read_csv(fname)
    
else:
    peakinfodf = getPeakInfo(canddf, lcdf)
    peakinfodf.to_csv(fname, index=False)

In [46]:
peakinfodf

Unnamed: 0,CID,peakmag_g,peakmag_r,peakmag_i,peakmag_z,peakmjd_g,peakmjd_r,peakmjd_i,peakmjd_z
0,1246273,22.221928,22.511208,22.459611,22.281168,56552.271,56551.247,56543.314,56534.225
1,1246344,22.095532,22.164398,22.285704,21.732009,56567.219,56590.343,56590.345,56590.348
2,1246433,22.320203,22.250798,22.223778,22.459820,57345.079,57301.294,57365.081,57317.097
3,1246515,24.061252,23.084712,23.050991,23.069104,56536.207,56538.261,56537.379,56563.339
4,1246618,26.226960,25.099346,24.401626,24.045498,56653.114,56559.299,56537.379,56537.291
...,...,...,...,...,...,...,...,...,...
31631,1965102,22.724093,24.421900,23.039081,24.045859,58026.334,58028.179,58004.375,58139.050
31632,1968025,23.668337,23.302817,21.833363,24.361939,58086.062,58029.164,58026.325,58041.136
31633,1970027,22.740350,23.097700,22.772998,22.089862,58028.164,58143.098,58026.325,58026.330
31634,1979080,24.884933,24.536853,24.426756,23.667159,58154.082,57996.393,57998.249,58026.307


In [47]:
canddf = pd.merge(canddf, peakinfodf, on='CID', how='left')

In [48]:
canddf.columns

Index(['CID', 'TYPE', 'FIELD', 'zHD', 'zHDERR', 'MWEBV', 'HOST_NMATCH',
       'HOST_NMATCH2', 'HOST_OBJID', 'HOST_ZPHOT', 'HOST_ZPHOTERR',
       'HOST_ZSPEC', 'HOST_ZSPECERR', 'HOST_RA', 'HOST_DEC', 'HOST_ANGSEP',
       'HOST_DDLR', 'HOST_CONFUSION', 'HOST_LOGMASS', 'HOST_LOGMASS_ERR',
       'SNRMAX1', 'SNRMAX2', 'SNRMAX3', 'TRANSIENT_NAME', 'RA', 'DEC',
       'nSNR3_Y0', 'nSNR3_Y1', 'nSNR3_Y2', 'nSNR3_Y3', 'nSNR3_Y4', 'nSNR3_Y5',
       'yearSNR3', 'flagSNR3', 'nSNR5_Y0', 'nSNR5_Y1', 'nSNR5_Y2', 'nSNR5_Y3',
       'nSNR5_Y4', 'nSNR5_Y5', 'yearSNR5', 'flagSNR5', 'cumSNR_Y0',
       'cumSNR_Y1', 'cumSNR_Y2', 'cumSNR_Y3', 'cumSNR_Y4', 'cumSNR_Y5',
       'cumSNRsgn_Y0', 'cumSNRsgn_Y1', 'cumSNRsgn_Y2', 'cumSNRsgn_Y3',
       'cumSNRsgn_Y4', 'cumSNRsgn_Y5', 'VAR0_Y0', 'VAR0_Y1', 'VAR0_Y2',
       'VAR0_Y3', 'VAR0_Y4', 'VAR0_Y5', 'VAR1_Y0', 'VAR1_Y1', 'VAR1_Y2',
       'VAR1_Y3', 'VAR1_Y4', 'VAR1_Y5', 'VARX_Y0', 'VARX_Y1', 'VARX_Y2',
       'VARX_Y3', 'VARX_Y4', 'VARX_Y5', 'minmag_g', 

## griz number of non-detection points from yearSNR3 light curves

In [49]:
# get number of non-detection points from yearSNR3 light curves

fname = BASEDIR + "LC_NnonDet.csv"

if os.path.exists(fname):
    nnondetdf = pd.read_csv(fname)
    
else:
    nnondetdf = getNnonDet(canddf, lcdf)
    nnondetdf.to_csv(fname, index=False)

In [50]:
nnondetdf

Unnamed: 0,CID,nnondet_g,nnondet_r,nnondet_i,nnondet_z
0,1246273,0,0,0,0
1,1246344,0,0,0,0
2,1246433,0,0,0,0
3,1246515,0,0,0,0
4,1246618,20,18,0,0
...,...,...,...,...,...
31631,1965102,31,31,31,31
31632,1968025,30,29,30,29
31633,1970027,30,28,30,16
31634,1979080,27,28,27,25


In [51]:
canddf = pd.merge(canddf, nnondetdf, on='CID', how='left')

In [52]:
canddf.columns

Index(['CID', 'TYPE', 'FIELD', 'zHD', 'zHDERR', 'MWEBV', 'HOST_NMATCH',
       'HOST_NMATCH2', 'HOST_OBJID', 'HOST_ZPHOT', 'HOST_ZPHOTERR',
       'HOST_ZSPEC', 'HOST_ZSPECERR', 'HOST_RA', 'HOST_DEC', 'HOST_ANGSEP',
       'HOST_DDLR', 'HOST_CONFUSION', 'HOST_LOGMASS', 'HOST_LOGMASS_ERR',
       'SNRMAX1', 'SNRMAX2', 'SNRMAX3', 'TRANSIENT_NAME', 'RA', 'DEC',
       'nSNR3_Y0', 'nSNR3_Y1', 'nSNR3_Y2', 'nSNR3_Y3', 'nSNR3_Y4', 'nSNR3_Y5',
       'yearSNR3', 'flagSNR3', 'nSNR5_Y0', 'nSNR5_Y1', 'nSNR5_Y2', 'nSNR5_Y3',
       'nSNR5_Y4', 'nSNR5_Y5', 'yearSNR5', 'flagSNR5', 'cumSNR_Y0',
       'cumSNR_Y1', 'cumSNR_Y2', 'cumSNR_Y3', 'cumSNR_Y4', 'cumSNR_Y5',
       'cumSNRsgn_Y0', 'cumSNRsgn_Y1', 'cumSNRsgn_Y2', 'cumSNRsgn_Y3',
       'cumSNRsgn_Y4', 'cumSNRsgn_Y5', 'VAR0_Y0', 'VAR0_Y1', 'VAR0_Y2',
       'VAR0_Y3', 'VAR0_Y4', 'VAR0_Y5', 'VAR1_Y0', 'VAR1_Y1', 'VAR1_Y2',
       'VAR1_Y3', 'VAR1_Y4', 'VAR1_Y5', 'VARX_Y0', 'VARX_Y1', 'VARX_Y2',
       'VARX_Y3', 'VARX_Y4', 'VARX_Y5', 'minmag_g', 

## First detection day measured from August 1

In [53]:
# get first detection day measured from August 1 from yearSNR3 light curves

fname = BASEDIR + "LC_FirstDetDays.csv"

if os.path.exists(fname):
    firstdetdaydf = pd.read_csv(fname)
    
else:
    firstdetdaydf = getFirstDetDays(canddf, lcdf)
    firstdetdaydf.to_csv(fname, index=False)

In [54]:
firstdetdaydf

Unnamed: 0,CID,firstdetday_g,firstdetday_r,firstdetday_i,firstdetday_z
0,1246273,29.218,29.221,29.223,29.225
1,1246344,29.218,29.221,29.223,29.225
2,1246433,14.293,14.302,14.318,14.341
3,1246515,31.207,33.261,32.379,32.291
4,1246618,0.000,0.000,32.379,32.291
...,...,...,...,...,...
31631,1965102,0.000,0.000,0.000,0.000
31632,1968025,0.000,0.000,0.000,0.000
31633,1970027,0.000,0.000,0.000,0.000
31634,1979080,0.000,0.000,0.000,0.000


In [55]:
canddf = pd.merge(canddf, firstdetdaydf, on='CID', how='left')

In [56]:
canddf

Unnamed: 0,CID,TYPE,FIELD,zHD,zHDERR,MWEBV,HOST_NMATCH,HOST_NMATCH2,HOST_OBJID,HOST_ZPHOT,...,peakmjd_i,peakmjd_z,nnondet_g,nnondet_r,nnondet_i,nnondet_z,firstdetday_g,firstdetday_r,firstdetday_i,firstdetday_z
0,1246273,0,C1,2.3522,0.0010,0.010097,1,1,590,1.43312,...,56543.314,56534.225,0,0,0,0,29.218,29.221,29.223,29.225
1,1246344,0,C1,-9.0000,-9.0000,0.012869,1,2,337,1.02155,...,56590.345,56590.348,0,0,0,0,29.218,29.221,29.223,29.225
2,1246433,0,X3,1.2206,0.0007,0.027303,1,1,19800,0.28819,...,57365.081,57317.097,0,0,0,0,14.293,14.302,14.318,14.341
3,1246515,0,X3,-9.0000,-9.0000,0.025877,1,1,18292,0.78169,...,56537.379,56563.339,0,0,0,0,31.207,33.261,32.379,32.291
4,1246618,0,X3,-9.0000,-9.0000,0.026634,1,3,156526,0.46001,...,56537.379,56537.291,20,18,0,0,0.000,0.000,32.379,32.291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31631,1965102,0,C1,-9.0000,-9.0000,0.010881,1,1,153121,0.82337,...,58004.375,58139.050,31,31,31,31,0.000,0.000,0.000,0.000
31632,1968025,0,C2,-9.0000,-9.0000,0.014389,2,4,-58337,-9.00000,...,58026.325,58041.136,30,29,30,29,0.000,0.000,0.000,0.000
31633,1970027,0,C2,-9.0000,-9.0000,0.012923,1,1,153398,0.43243,...,58026.325,58026.330,30,28,30,16,0.000,0.000,0.000,0.000
31634,1979080,0,C3,-9.0000,-9.0000,0.008402,1,1,153596,0.71025,...,57998.249,58026.307,27,28,27,25,0.000,0.000,0.000,0.000


In [57]:
# Output all of this to a csv file

fname = BASEDIR + "LC_MergedOutput.csv"

if os.path.exists(fname):
    os.remove(fname)
    
canddf.to_csv(fname, index=False)