In [28]:
#import necessary packages

import pandas as pd
import numpy as np
import random as rn
from scipy.optimize import curve_fit

In [69]:
#IMPORT TRAINING DATA AND SEPARATE INTO JUST SUPERNOVAS
#WORK WITH THIS DATA FOR THE SAKE OF DEVELOPING ALGORITHMS, EXPAND SET WHEN 
#CLASSIFICATION CAN BE PERFORMED

#targets that represent supernovae are 42 and 90

train_series = pd.read_csv('training_set.csv')
train_series = train_series.sort_values(['object_id', 'passband'], ascending = [True, True])

train_metadata = pd.read_csv('training_set_metadata.csv')

#metadata for the two supernova classes in the lsst simulated data
supernovae42_meta = train_metadata.loc[train_metadata['target'] == 42]
supernovae90_meta = train_metadata.loc[train_metadata['target'] == 90]

#object_id for all label 42 and 90 events
ids_42 = np.array(supernovae42_meta['object_id'].unique())
ids_90 = np.array(supernovae90_meta['object_id'].unique())

#time series for two supernova classes
supernovae42_ts = train_series[train_series['object_id'].isin(ids_42)]
supernovae90_ts = train_series[train_series['object_id'].isin(ids_90)]

print(ids_42)
print(ids_90)

supernovae42_ts.head()

[      730      1632      2103 ... 130319749 130552230 130659834]
[      745      1124      1598 ... 130375489 130414189 130755807]


Unnamed: 0,object_id,mjd,passband,flux,flux_err,detected
712,730,59818.274,0,-2.3422,1.801066,0
713,730,59819.2541,0,3.380978,2.4696,0
714,730,59820.2522,0,-2.230815,1.915426,0
715,730,59821.2478,0,1.159034,2.461736,0
716,730,59822.2433,0,5.942166,2.90158,0


<b>Parametric fitting for I-a supernovae</b>
<br><br>
Below I have implemented a simple parametric fit that is implemented on a potential supernova event using least-squares optimization. These methods will be important as I begin to develop the second layer of classification

In [None]:
#implement the bazin parametric fit
#simple model with no physical basis but an ability to reproduce the behavior of most light curves
#pre-processing for supernova classification

def bazin(t, A, t0, trise, tfall):
    if tfall <= trise:
        return np.zeros(t.shape)
    res = A * np.exp(-(t - t0) / tfall) / (1 + np.exp(-(t - t0) / trise))
    return res

In [83]:
#recover parameters for a given object
#model by zheng, kelly, and filippenko: specific to Ia SN
def get_zkffit(timeseries, metadata, obj_id):
    #setting up dataseries that will contain parameters for object
    bands = [0, 1, 2, 3, 4, 5]
    res = pd.DataFrame()
    res['object_id'] = [obj_id]
    res['bazin_trise'] = np.NaN
    for band in bands:
        res['bazin_tfall_{}'.format(band)] = np.NaN
        res['bazin_A_{}'.format(band)] = np.NaN
    
    #creating a new dataframe with the object's recorded events
    obj_ts = timeseries[timeseries['object_id'] == obj_id]
    obj_meta = metadata[metadata['object_id'] == obj_id]    
    #applying naive benchmark that supernovae are extragalactic
    if (obj_meta['hostgal_photoz']).mean() == 0:
        print('This not not extragalactic!')
        return res
    
    #bazin fit is sensitive to time given in mjd,
    #so we must scale so each object starts at t=0
    
    #create adjusted dataframes for each band's time
    #create time series limited to passband
    ts0 = obj_ts[(obj_ts['passband'] == 0)]
    ts1 = obj_ts[(obj_ts['passband'] == 1)]
    ts2 = obj_ts[(obj_ts['passband'] == 2)]
    ts3 = obj_ts[(obj_ts['passband'] == 3)]
    ts4 = obj_ts[(obj_ts['passband'] == 4)]
    ts5 = obj_ts[(obj_ts['passband'] == 5)]
    
    #select mjd column in time series
    mjd_init0 = ts0[['mjd']]
    mjd_init1 = ts1[['mjd']]
    mjd_init2 = ts2[['mjd']]
    mjd_init3 = ts3[['mjd']]
    mjd_init4 = ts4[['mjd']]
    mjd_init5 = ts5[['mjd']]
    
    #find minimum time value for each passband
    min0 = mjd_init0.min()
    min1 = mjd_init1.min()
    min2 = mjd_init2.min()
    min3 = mjd_init3.min()
    min4 = mjd_init4.min()
    min5 = mjd_init5.min()
    
    #create series of scaled time values
    mjd0 = (mjd_init0).subtract(min0)
    mjd1 = (mjd_init1).subtract(min1)
    mjd2 = (mjd_init2).subtract(min2)
    mjd3 = (mjd_init3).subtract(min3)
    mjd4 = (mjd_init4).subtract(min4)
    mjd5 = (mjd_init5).subtract(min5)
    
    #name column so update method can be applied
    mjd0 = pd.DataFrame(mjd0, columns = ['mjd'])
    mjd1 = pd.DataFrame(mjd1, columns = ['mjd'])
    mjd2 = pd.DataFrame(mjd2, columns = ['mjd'])
    mjd3 = pd.DataFrame(mjd3, columns = ['mjd'])
    mjd4 = pd.DataFrame(mjd4, columns = ['mjd'])
    mjd5 = pd.DataFrame(mjd5, columns = ['mjd'])
    
    #update all passband-limited time series
    #this is the information that will be manipulated from now
    ts0.update(mjd0)
    ts1.update(mjd1)
    ts2.update(mjd2)
    ts3.update(mjd3)
    ts4.update(mjd4)
    ts5.update(mjd5)
    
    return ts0

In [84]:
get_bazin(supernovae42_ts, supernovae42_meta, 130659834)

Unnamed: 0,object_id,mjd,passband,flux,flux_err,detected
1420175,130659834,0.0,0,2.89216,8.843315,0
1420195,130659834,323.1727,0,2.753667,10.756069,0
1420206,130659834,352.0313,0,-8.251876,10.655227,0
1420207,130659834,354.0355,0,-0.800705,7.725579,0
1420215,130659834,381.957,0,-7.528365,7.241809,0
1420223,130659834,434.9019,0,9.487567,7.582758,0
1420251,130659834,737.0956,0,-3.156383,7.291931,0
1420261,130659834,790.9611,0,11.842884,11.391657,0
1420266,130659834,821.9179,0,-1.24797,11.74006,0
1420267,130659834,822.8907,0,5.501731,9.815599,0


In [None]:
#recover bazin parameters for a given object
def get_bazin(timeseries, metadata, obj_id):
    #setting up dataseries that will contain parameters for object
    bands = [0, 1, 2, 3, 4, 5]
    res = pd.DataFrame()
    res['object_id'] = [obj_id]
    res['bazin_trise'] = np.NaN
    for band in bands:
        res['bazin_tfall_{}'.format(band)] = np.NaN
        res['bazin_A_{}'.format(band)] = np.NaN
    
    #creating a new dataframe with the object's recorded events
    obj_ts = timeseries[timeseries['object_id'] == obj_id]
    obj_meta = metadata[metadata['object_id'] == obj_id]    
    #applying naive benchmark that supernovae are extragalactic
    if (obj_meta['hostgal_photoz']).mean() == 0:
        print('This not not extragalactic!')
        return res
    
    #creating lists to contain all the features that will be calculated
    offset = 11
    t0s = []
    trises = []
    p0s = []
    bmins = []
    bmaxs = []
    xs = []
    ys = []
    y_errs = []
    mcovs = []
    
    #calculating features within each band
    for band in bands:
        try:
            single_ts = obj_ts[(obj_ts.object_id == obj_id) & (obj_ts.passband == band)]
            mjd_delta_prev = (single_ts.mjd - single_ts.mjd.shift(1)).fillna(100).values.ravel()
            mjd_delta_next = (single_ts.mjd.shift(-1) - single_ts.mjd).fillna(100).values.ravel()
            
            #taking the time range and average flux error of data
            x_min = single_ts.mjd.min()
            x_max = single_ts.mjd.max()
            y_err_mean = df.flux_err.mean()
            
            #scaling time because bazin function is sensitive to use of mjd
            #also creating evenly spaced samples
            #using a random number generator and the expected flux error in the band
            mjd_delta_prev = np.concatenate((100*np.ones((offset,)), mjd_delta_prev, 100*np.ones((offset,))))
            x = np.concatenate((np.linspace(x_min-500, x_min-450, offset), single_ts.mjd.values, np.linspace(x_max+450, x_max+500, offset)))
            y = np.concatenate((np.random.randn(offset) * y_err_mean, df.flux_values, np.random.randn(offset)*y_err_mean))
            y_err = np.concatenate((y_err_mean * np.ones(offset), df.flux_err, y_err_mean*np.ones(offset)))
        
            idxmax = np.argmax(y) #y index of maximum flux
            t00 = x[np.argmax(y)] #time of maximum flux
            A = y.max() #A = value of maximum flux
            #possible bounds for the constant A
            Amin = A*1.3
            Amax = 4*A
            #some sample values for parameters that must be tuned
            tstart = -5
            trise = 5
            tfall = 10
            
            #adjust guesses based on distance of point of max flux
            #to next time series point
            if mjd_delta_prev[idxmax] > 50:
                tstart = -50
                Amin = 2*A
                Amax = 5*A
                trise = 20
                tfall = 40
                tmax = 20
            if mjd_delta_next[idxmax] > 50:
                Amin = 2*A
                Amax = 5*A
                trise = 20
                tfall = 40
            
            #calculate last of parameters using parameters calculated
            #thus far
            p0 = [(Amin + Amax) / 2, t00 + tstart, trise, tfall]
            bmin = [Amin, t00 + tstart - 100, trise/10, tfall/10]
            bmax = [Amax, t00 + tstart, trise*10, tfall*10]
            bounds = (bmin, bmax)
            
            #test fit of bazin model
            def test_bazin(t, A, t0, trise, tfall):
                res = curve(t, A, t0, trise, tfall)
            return res
            
            params, params_covariance = curve_fit(test_bazin, x, y, p0, y_err, bounds = bounds, max_nfev=1000)
            
            median_cov = np.abs(np.median(params_covariance / A))
            
            if (median_cov <=10) & (band in range(0,5)):
                t0s.append(params[1])
            trises.append(params[2])
            p0s.append([params[0], params[3]])
            bmins.append([bmin[0], bmin[3]])
            bmaxs.append([bmax[0], bmax[3]])
            xs.append(x)
            ys.append(y)
            y_errs.append(y_err)
            mcovs.append(median_cov)
        except:
            continue
            
    #if there is no valid maximum value in the data,
    #the fit has failed and the object is not a supernova. 
    if len(t0s) <= 1:
        print('The fit has failed!')
        return
    t00 = np.median(t0s)
    trise = np.median(trises)
    res['bazin_trise'] = trise
    
    for pb, p0, bmin, bmax, x, y, y_err, median_cov in zip(range(0,6), p0s, bmins, bmax, xs, ys, y_errs, mcovs):
        try:
            bounds = (bmin, bmax)
            
            params, params_covariance = curve_fit(test_bazin, x, y, p0, y_err, bounds = bounds, max_nfev=1000)
            median_cov = np.median(params_covariance / A)
        
            if median_cov <= 10:
                res['bazin_tfall_.{}'.format(band)] = params[1]
                res['bazin_A_{}'.format(band)] = params[0]
        except:
            continue
    print(res)
    return res