In [1]:
import os
import glob
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from time import sleep

In [8]:
band = ["g'", "r'"]
filters = ['g', 'r', "g'", "r'"]

if all(elem in band for elem in filters):
    print('ha')

In [13]:
os.chdir('/home/ricky/RNNAE/OSC_json')
print(os.getcwd())

/home/ricky/RNNAE/OSC_json


In [14]:
class QC:

    def __init__(self, filename, filters, json_data_temp):

        self.SN_name = filename.replace('.json', '')
        self.SN_name = self.SN_name.replace('_', ':')
        
        self.filters = filters
        self.json_data_temp = json_data_temp

        self.Band = []
        self.filename_QC = False

        self.t = []
        self.m_app = []

        for ii in range(len(self.filters)):
            self.t.append([])
            self.m_app.append([])

    def avoid_NON_SNIa(self):
        
        try:
            SN_type = self.json_data_temp[self.SN_name]['claimedtype'][0]['value']
        except:
            SN_type = ''

        if 'Ia' in SN_type:
            self.filename_QC = True
        else:
            self.filename_QC = False

        return self.filename_QC
    
    def avoid_empty_SN(self, num=40, lc_length_prepeak=-50, lc_length_postpeak=135):
        
        try:
            N = len(self.json_data_temp[self.SN_name]['photometry'])
        except:
            N = 1

        for i in range(N):

            try:
                self.Band.append(self.json_data_temp[self.SN_name]['photometry'][i]['band'])
            except:
                self.Band.append(0)
        

        a = 0 # Number of data points at the specified bands

        if all(elem in self.Band for elem in self.filters):

            self.filename_QC = True

            for ii in range(N):
                for jj, filter in enumerate(self.filters):
                    if self.Band[ii] == filter:

                        self.m_app[jj].append(float(self.json_data_temp[self.SN_name]['photometry'][ii]['magnitude']))

                        self.t[jj].append(float(self.json_data_temp[self.SN_name]['photometry'][ii]['time']))

            Magnitude_Max_id = np.argmin(self.m_app[0])
            Time_Max = self.t[0][Magnitude_Max_id]

            for ii in range(len(self.filters)):
                Time_Duration = [jj for jj in self.t[ii] if (jj > (Time_Max + lc_length_prepeak)) and (jj < (Time_Max + lc_length_postpeak))]
                a += len(Time_Duration)
        
        if a > num:
            self.filename_QC = True
        else:
            self.filename_QC = False

        try:
            LumDist = float(self.json_data_temp[self.SN_name]['lumdist'][0]['value'])
            z = float(self.json_data_temp[self.SN_name]['redshift'][0]['value'])
        except:
            self.filename_QC = False

        return self.filename_QC
        

In [15]:
class LC_Preprocess:

    def __init__(self, filename, filters, json_data_temp):

        self.filename = filename

        self.SN_name = self.filename.replace('.json', '')
        self.SN_name = self.SN_name.replace('_', ':')
        
        self.filters = filters
        self.json_data_temp = json_data_temp

        self.Band = []
        self.Type = 0

        self.Time = []
        self.Magnitude_Abs = []
        self.Magnitude_Abs_err = []

        for ii in range(len(self.filters)):
            self.Time.append([])
            self.Magnitude_Abs.append([])
            self.Magnitude_Abs_err.append([])

    def peak_alignment(self, lc_length_prepeak=-50, lc_length_postpeak=135):

        Band_Max = None
        print(self.Magnitude_Abs)

        if len(self.Magnitude_Abs[10]) != 0: # Choosing g band as default
            Band_Max = 10
        else:
            for ii, filter in enumerate(self.filters):
                if len(self.Magnitude_Abs[ii]) != 0:
                    Band_Max = ii
                    break
        
        Magnitude_Max_id = np.argmin(self.Magnitude_Abs[Band_Max])
        Time_Max = self.Time[Band_Max][Magnitude_Max_id]

        for ii, filter in enumerate(self.filters):

            self.Time[ii] = np.array(self.Time[ii]) - Time_Max

            self.Time[ii]              = np.delete(self.Time[ii], np.where(self.Time[ii] > lc_length_postpeak))
            self.Magnitude_Abs[ii]     = self.Magnitude_Abs[ii][0:len(self.Time[ii])]
            self.Magnitude_Abs_err[ii] = self.Magnitude_Abs_err[ii][0:len(self.Time[ii])]

            self.Time[ii]              = np.delete(self.Time[ii], np.where(self.Time[ii] < lc_length_prepeak))
            self.Magnitude_Abs[ii]     = self.Magnitude_Abs[ii][len(self.Magnitude_Abs[ii]) - len(self.Time[ii]):]
            self.Magnitude_Abs_err[ii] = self.Magnitude_Abs_err[ii][len(self.Magnitude_Abs_err[ii]) - len(self.Time[ii]):]

            if (len(self.Time[ii]) - len(self.Magnitude_Abs[ii])) != 0:
                print('bruh')

        return self.Time, self.Magnitude_Abs, self.Magnitude_Abs_err, self.Type

    def LC_graph(self):

        colors = ['darkviolet', 'royalblue', 'seagreen', 'crimson', 'maroon', 'darkmagenta', 'darkgreen', 'firebrick', 'darkred', 'indigo', 'darkseagreen', 'indianred', 'sienna']

        plt.plot(figsize=(16,12))

        for ii, filter in enumerate(self.filters):
            plt.errorbar(self.Time[ii], self.Magnitude_Abs[ii], self.Magnitude_Abs_err[ii], label=filter, color=colors[ii], fmt='.')
        
        plt.title('{}, {}'.format(self.SN_name, self.Type))
        plt.xlim(-50, 135)
        plt.xlabel('time (day)')
        plt.ylabel('absolute magnitude')
        plt.legend()
        plt.grid()
        plt.gca().invert_yaxis()
        plt.savefig('/home/ricky/RNNAE/import_graph/{}.pdf'.format(self.SN_name))
        plt.show()

    def LC_extractor(self, **kwargs):

        LumDist = float(self.json_data_temp[self.SN_name]['lumdist'][0]['value'])

        try:
            LumDist_err = float(self.json_data_temp[self.SN_name]['lumdist'][0]['e_value'])
        except:
            LumDist_err = 0

        z = float(self.json_data_temp[self.SN_name]['redshift'][0]['value'])

        N = len(self.json_data_temp[self.SN_name]['photometry'])

        self.Type = self.json_data_temp[self.SN_name]['claimedtype'][0]['value']

        for ii in range(N):

            try:
                self.Band.append(self.json_data_temp[self.SN_name]['photometry'][ii]['band'])
            except:
                self.Band.append(0)

            for jj, filter in enumerate(self.filters):

                if self.Band[ii] == filter:

                    self.Magnitude_App = float(self.json_data_temp[self.SN_name]['photometry'][ii]['magnitude'])

                    self.Time[jj].append(float(self.json_data_temp[self.SN_name]['photometry'][ii]['time']))
                    
                    self.Magnitude_Abs[jj].append(self.Magnitude_App - 5*np.log10(LumDist*1e5) + 2.5*np.log10(1+z))

                    try:
                        Magnitude_App_err = float(self.json_data_temp[self.SN_name]['photometry'][jj]['e_magnitude'])
                        self.Magnitude_Abs_err[jj].append(np.sqrt(Magnitude_App_err**2 + (5*0.434*LumDist_err/LumDist)**2))
                    except:
                        self.Magnitude_Abs_err[jj].append(0.3)


        if kwargs['peak_alignment'] == True:
            LC_Preprocess.peak_alignment(self)
        
        if kwargs['LC_graph'] == True:
            LC_Preprocess.LC_graph(self)
        
        return self.Time, self.Magnitude_Abs, self.Magnitude_Abs_err, self.Type

In [16]:
filenames = glob.glob('*.json')
np.random.seed(1)
np.random.shuffle(filenames)
print(len(filenames))

# Create a list for all .json, the 1st SN saved as json_data[0], the 2nd SN saved as json_data[1], etc.
#json_data = []
Time = []
Magnitude_Abs = []
Magnitude_Abs_err = []
Type = []
filter_all = ['U', 'B', 'V', 'R', 'I', "u'", "g'", "r'", "i'", 'u', 'g', 'r', 'i'] #13
jj = 0

for ii, filename in tqdm(enumerate(filenames)):
    with open(filename, encoding="utf-8") as f:
        json_data_temp = json.load(f)
        sleep(0)

        filename_QC1 = QC(filename, filter_all, json_data_temp).avoid_NON_SNIa()
        filename_QC2 = QC(filename, filter_all, json_data_temp).avoid_empty_SN()

        if (filename_QC1 and filename_QC2) == True:
            LC_result = LC_Preprocess(filename, filter_all, json_data_temp).LC_extractor(peak_alignment = True, LC_graph = False)
            #LC_result = LC_Preprocess(filename, filter_all, json_data_temp).peak_alignment(-50, 135)
            Time.append(LC_result[0])
            Magnitude_Abs.append(LC_result[1])
            Magnitude_Abs_err.append(LC_result[2])
            Type.append(LC_result[3])
            jj += 1

print(jj)

94174


0it [00:00, ?it/s]

[[-17.871705900981013, -16.815405900981013, -16.841505900981016, -16.102305900981015, -16.194105900981015, -16.186905900981014], [-18.338305900981013, -18.391305900981013, -18.415305900981014, -18.424305900981015, -18.382305900981013, -18.344305900981013, -18.423705900981012, -18.213305900981013, -17.882305900981013, -17.805305900981015, -17.693305900981013, -17.563305900981014, -17.472305900981013, -17.454605900981015, -17.426305900981014, -17.285305900981015, -17.243305900981014, -17.108305900981016, -17.242405900981016, -16.962305900981015, -16.947505900981014, -16.863205900981015, -16.722305900981013, -16.700305900981014, -16.619305900981015, -16.243005900981014, -16.385305900981013, -16.028305900981014, -16.111305900981016, -15.991305900981013, -15.898905900981012, -15.929305900981012, -15.720305900981012, -15.904305900981013, -15.827305900981012, -15.921405900981012, -15.618305900981012, -15.52730590098101, -15.343805900981012, -15.107505900981012, -15.598305900981012, -15.445805