This notebook implemeting all necessery process to obtain the spectra that will be used in my Doctorate degreee

In [1]:
import os
import re
import requests
import zipfile
import json 
import datetime
import shutil

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from scipy import interpolate

from astropy.time import Time

# WISeREP

The main webpage is: [WISeREP](https://www.wiserep.org/)

## Download data

In [None]:
WISeREP                = "www.wiserep.org"
url_wis_spectra_search = "https://" + WISeREP + "/search/spectra"

# Specify the Personal api key here (*** MUST BE PROVIDED ***)
personal_api_key       = "9da72158cebc45da6305466dd99895b079219f6b"
# for User-Agent:
WIS_USER_NAME          = "Steve Jurado"
WIS_USER_ID            = "Steve Jurado"

In [None]:
supernovae_types = {
    1: 'SN', 2: 'SN I', 3: 'SN Ia', 4: 'SN Ib', 5: 'SN Ic', 6: 'SN Ib/c',
    7: 'SN Ic-BL', 9: 'SN Ibn', 10: 'SN II', 11: 'SN IIP', 12: 'SN IIL', 13: 'SN IIn', 14: 'SN IIb', 15: 'SN I-faint', 16: 'SN I-rapid',
    18: 'SLSN-I', 19: 'SNLS-II', 20: 'SNSN-R',
    100: 'SN ia-pec', 102: 'SN ia-SC', 103: 'SN Ia-91bg-like', 104: 'SN Ia-91T-like', 105: 'SN iax[02cx-like]', 106: 'Sn Ia-CSM',
    107: 'SN ib-pec', 108: 'SN Ic-pec', 109: 'SN Icn', 110: 'SN Ibn/Icn',
    111: 'SN II-pec', 112: 'SN IIn-pec', 115: 'SN Ib-Ca-rich', 116: 'SN Ib7c-Ca-rich', 117: 'SN Ic-Ca-rich', 118: 'SN Ia-Ca-rich'
}

# spectypes = {10: 'Object', 50: 'Synthetic'}

query_params    = "&public=yes&type[]="+"["+",".join(str(x) for x in supernovae_types.keys())+']'+"&spectypes[]=10"
download_params_html = "&num_page=250&format=html&files_type=ascii"
download_params_csv = "&num_page=250&format=csv&files_type=ascii"
download_params = download_params_csv

parameters_csv = "?" + query_params+download_params_csv + "&personal_api_key=" + personal_api_key
parameters_html = "?" + query_params+download_params_html + "&personal_api_key=" + personal_api_key

# url of wiserep spectra search (with parameters)
URL      = url_wis_spectra_search + parameters_csv
URL_html = url_wis_spectra_search + parameters_html

print('The URL of WISeREP spectra search is:\n',URL_html)

In [None]:
def is_string_json(string):
    try:
        json_object = json.loads(string)
    except Exception:
        return False
    return json_object

In [None]:
def response_status(response):
    # external http errors
    ext_http_errors       = [403, 500, 503]
    err_msg               = ["Forbidden", "Internal Server Error: Something is broken", "Service Unavailable"]

    json_string = is_string_json(response.text)
    if json_string != False:
        status = "[ " + str(json_string['id_code']) + " - '" + json_string['id_message'] + "' ]"
    else:
        status_code = response.status_code
    if status_code == 200:
        status_msg = 'OK'
    elif status_code in ext_http_errors:
        status_msg = err_msg[ext_http_errors.index(status_code)]
    else:
        status_msg = 'Undocumented error'
    status = "[ " + str(status_code) + " - '" + status_msg + "' ]"
    return status

def print_response(response, page_num):
    status = response_status(response)
    stats = 'Page number ' + str(page_num) + ' | return code: ' + status        
    print(stats)

In [None]:
#------------------------------------------------------------------------
# current date and time
current_datetime = datetime.datetime.now()
current_date_time = current_datetime.strftime("%Y%m%d")

# current working directory
cwd = os.getcwd()

# current download folder
current_download_folder = os.path.join(cwd, "wiserep_spectra_data")
os.makedirs(current_download_folder, exist_ok=True)

# marker and headers
wis_marker = 'wis_marker{"wis_id": "' + str(WIS_USER_ID) + '", "type": "user", "name": "' + WIS_USER_NAME + '"}'
headers = {'User-Agent': wis_marker}

# check file extension
if "format=tsv" in download_params:
    extension = ".tsv"
elif "format=csv" in download_params:
    extension = ".csv"
elif "format=json" in download_params:
    extension = ".json"
else:
    extension = ".txt"

# meta data list and file
META_DATA_LIST = []
META_DATA_FILE = os.path.join(cwd, "wisrep_spectra_metadata" + extension)

In [None]:
# page number
page_num = 0
page_num_max = 185

# go trough every page
while page_num < page_num_max:

    # url for download
    url = URL + "&page=" + str(page_num)
    
    # send requests
    response = requests.post(url, headers = headers, stream = True)
    
    # chek if response status code is not 200
    if (response.status_code != 200):
        # if there are no more pages for download, don't print response, 
        # only print if response is something else
        if response.status_code != 404:
            print_response(response, page_num + 1)
            page_num += 1
        continue
    
    # print response
    print_response(response, page_num + 1)
    
    # download data
    file_name = 'wiserep_spectra.zip'
    file_path = os.path.join(current_download_folder, file_name)
    with open(file_path, 'wb') as f:
        for data in response:
            f.write(data)
    
    # unzip data
    zip_ref = zipfile.ZipFile(file_path, 'r')
    zip_ref.extractall(current_download_folder)
    zip_ref.close()
    # remove .zip file
    os.remove(file_path)            
    
    # take meta data file
    downloaded_files = os.listdir(current_download_folder)
    meta_data_file = os.path.join(current_download_folder, [e for e in downloaded_files if 'wiserep_spectra' in e][0])          
    # read meta data file
    f = open(meta_data_file,'r')
    meta_data_list = f.read().splitlines()
    f.close()
    # write this meta data list to the final meta data list
    if page_num == 0:
        META_DATA_LIST = META_DATA_LIST + meta_data_list
    else:
        META_DATA_LIST = META_DATA_LIST + meta_data_list[1:]         
    # increase page number 
    page_num = page_num + 1                 
    # remove meta data file
    os.remove(meta_data_file)

# write meta data list to file         
if META_DATA_LIST != []:
    f = open(META_DATA_FILE, 'w')
    for i in range(len(META_DATA_LIST)):
        if i == len(META_DATA_LIST) - 1:
            f.write(META_DATA_LIST[i])
        else:
            f.write(META_DATA_LIST[i] + '\n')
    f.close()
    print()
    print ("Wiserep data was successfully downloaded.")
    #print ("Folder /wiserep_data_" + current_date_time + "/ containing the data was created.")
    print ("Folder /wiserep_data_" + "/ containing the data was created.")
    #print ("File spectra_" + current_date_time + extension + " was created.")
    print ("File spectra_" + extension + " was created.")
else:
    print ("There is no WISeREP data for the given parameters.")
    shutil.rmtree(current_download_folder)

In [None]:
fidx = 0
file_name = os.listdir(path='wiserep_data_')[fidx]

columns = ['lambda', 'flux_lambda']

data = pd.read_csv(filepath_or_buffer='wiserep_data_/' + file_name, delim_whitespace=True)
data.columns = columns
data.head()

In [None]:
data.plot(x='lambda',y='flux_lambda', kind='line')
plt.xlabel('lambda')
plt.ylabel('flux')

Then the total amount of spectral obtained from WISeREP is

In [None]:
print('The total amount of spectra:',len(os.listdir(path='./wiserep_spectra_data')))

## Unify

### Preliminars

In [None]:
# Creating the directory when the spectral processed will be save
os.makedirs(name='data/wiserep_spectra',exist_ok=True)

In [None]:
# To select one file to prove some 
fidx = 1
file_name = os.listdir(path='wiserep_spectra_data')[fidx]
file_name.split(sep='_')

In [None]:
PATH_input = "./wiserep_data_/"
file = 'SN2007af_2007-04-10_09-30-14_Lick-3m_KAST_UCB-SNDB.flm'
file_split = file.split(sep='_')[3:]
with open(PATH_input+file, 'r') as infile:
    content = infile.readlines()
    for line in content:
        new_line = line.strip() + "\t"+"".join(file_split)
        print(new_line)

### Functions

In [None]:
PATH_INPUT  = './wiserep_spectra_data/'
PATH_OUTPUT = './data/wiserep_spectra/'

In [None]:
def dictionary_types_files(path_input: str) -> dict:

    """
    Return a dictionary with all supernova names of the spectra files
    based on the file's type.
    
    Parameters: 
    -----------
    path_input -> src: folder where there are the spectra.

    Returns:
    -----------
    SN_Dict -> dict: Dictionary with all names of SN
    """

    all_files = [files for files in os.listdir(path=path_input)]

    # Select just those begin with SNLS
    SNLS_list_files = [file for file in all_files if file.startswith('SNLS')]     
    SNLS_list_names = [snname.split('_')[0] for snname in SNLS_list_files]
    SNLS_list_names = list(set(SNLS_list_names))
    
    # Select whose that begin with other prefix
    other_SN_list_files = [file for file in all_files if (file.startswith('2MASS') or not file.startswith('SN'))]
    other_SN_list_files = [file for file in other_SN_list_files if not file[1].isdigit()]
    other_SN_names = [snname.split('_')[0] for snname in other_SN_list_files]
    other_SN_names = list(set(other_SN_names))

    # Select SN that begins with SN but not continue with the year
    no_SNSNLS_list_files = [file for file in all_files if file.startswith('SN')] 
    no_SNSNLS_list_files = [file for file in no_SNSNLS_list_files if not file.startswith('SNLS')] 
    no_SNSNLS_list_files = [file for file in no_SNSNLS_list_files if not file[2].isdigit()]
    no_SNSNLS_list_names = [snname.split('_')[0] for snname in no_SNSNLS_list_files]
    no_SNSNLS_list_names = list(set(no_SNSNLS_list_names))
    no_SNSNLS_list_names
    
    # Select SN that begins with SN and continue with the year.
    # This is the mosst complicated part.
    SN_list_files = [file for file in all_files if file.startswith('SN')] 
    SN_list_files = [file for file in SN_list_files if file not in SNLS_list_files ]         
    SN_list_files = [file for file in SN_list_files if file not in no_SNSNLS_list_files]         
    SN_list_names = [snname.split('_')[0] for snname in SN_list_files]
    SN_list_names = [snnameidx.replace('SN', '') for snnameidx in SN_list_names]
    SN_list_names = list(set(SN_list_names))

    SN_rest_list_files = [file for file in all_files if file not in SNLS_list_files]
    SN_rest_list_files = [file for file in SN_rest_list_files if file not in no_SNSNLS_list_files]
    SN_rest_list_files = [file for file in SN_rest_list_files if file not in other_SN_list_files]
    SN_rest_list_files = [file for file in SN_rest_list_files if file not in SN_list_files]
    
    # Weird files
    weird_SN_list_files = [file for file in SN_rest_list_files if not file[3].isdigit()]
    weird_SN_list_names = ['_'.join(snname.split('_')[:2]) for snname in weird_SN_list_files]
    weird_SN_list_names = list(set(weird_SN_list_names))

    # Joining the big amount of SN
    SN_rest_list_files = [file for file in SN_rest_list_files if file not in weird_SN_list_files]
    SN_rest_names = [snname.split('_')[0] for snname in SN_rest_list_files]
    SN_rest_names = [snname for snname in SN_rest_names if snname not in SN_list_names]
    SN_rest_names = list(set(SN_rest_names))
    
    SN_names = SN_list_names | SN_rest_names
    
    # Weird files

    SN_dict = {'SN': SN_names,
               'SNLS': SNLS_list_names,
               'Other_SN': no_SNSNLS_list_names,
               'Other':other_SN_names,
               'Weird':weird_SN_list_names}

    return SN_dict

In [None]:
SN_dict = dictionary_types_files(path_input=PATH_INPUT)

In [None]:
def files2raw(sn_name: str, path_input:str, path_output:str, key_value: bool =None) -> None:
    
    """Join all the spectra files in one

    Parameters:
    ------------

    Returns:
    ------------
    sn_name_raw.dat -> text file with the columns to be transformed in a dataframe
    
    """

    if key_value == None:
        return print('Error you need a key_value !!')

    if key_value == 'Weird':
        return print('This is a weird file')
        
    sn_pattern = re.compile(sn_name, re.IGNORECASE)
    matching_files = []
    # Buscar archivos que coincidan con el patrón
    for file in os.listdir(path=path_input):
        if sn_pattern.search(file):
            matching_files.append(file)

    # Crear el archivo de salida utilizando el nombre base del patrón
    if key_value == 'SN':

        with open(path_output + f'SN{sn_name}_raw.dat', 'w') as f:
            for file in matching_files:
                instrument = file.split(sep='_')[3:]
                hour = file.split(sep='_')[2].replace('-',':')
                date = file.split(sep='_')[1]
                #print(instrument)
                with open(path_input + file, 'r') as infile:
                    content = infile.readlines()
                    for line in content:
                        if line.strip() and line.strip()[0].isdigit():
                            new_line = line.strip() + "\t" + "_".join(instrument) + "\t" + "T".join([date,hour])
                            f.write(new_line)
                            f.write('\n')
                infile.close()
        f.close()

    else:
        with open(path_output + f'{sn_name}_raw.dat', 'w') as f:
            for file in matching_files:
                instrument = file.split(sep='_')[3:]
                hour = file.split(sep='_')[2].replace('-',':')
                date = file.split(sep='_')[1]
                with open(path_input + file, 'r') as infile:
                    content = infile.readlines()
                    for line in content:
                        if line.strip() and line.strip()[0].isdigit():
                            new_line = line.strip() + "\t" + "_".join(instrument) + "\t" + "T".join([date,hour])
                            f.write(new_line)
                            f.write('\n')
                infile.close()
        f.close()

In [None]:
file_raw_list = [file for file in os.listdir(path='./data/spectra/') if '_raw' in file]
file_data_list = [file for file in os.listdir(path='./data/spectra/') if '_raw' not in file] 
file_raw_list_name = [file_name.split('_')[0] for file_name in file_raw_list]

len(file_raw_list)

In [None]:
def file2dataframe(sn_name:str, path_input:str, path_output:str, key_value:bool=None) -> None:
    
    """Takes the raw.dat file and it corvert to 'dataframe'
    
    
    """
    
    if key_value == None:
        return print('Error u need a key_value !!')
        
    if key_value == 'Weird':
        return print('This is a weird file')

    data = []

    if key_value == 'SN':
        # Leer el archivo línea por línea
        with open(f"{path_output}SN{sn_name}_raw.dat", 'r') as file:
            for line in file:
                # Dividir la línea en columnas
                columns = re.split(r'\s+', line.strip())
                # Si la fila tiene menos de 4 columnas, agregar NaN y permutar
                if len(columns) == 4:
                    columns.append(np.nan)  # Añadir NaN
                    columns[2], columns[-1] = columns[-1], columns[2]  # Permutar la tercera con la cuarta columna
                    columns[3], columns[-1] = columns[-1], columns[3]  # Permutar la tercera con la cuarta columna
                # Si la fila tiene más de 4 columnas, la truncamos
                if len(columns) > 4:
                    columns = columns[:6]
                data.append(columns)

        # Crear un DataFrame con los datos procesados
        headers = ['lambda', 'flux_lambda', 'e_flux_lambda', 'instrument', 'date']
        df = pd.DataFrame(data, columns=headers)
        # Asegurarse de que la columna 'date' sea de tipo string
        df['date'] = df['date'].astype(str)
        # Convertir cada fecha a MJD
        df['mjd'] = df['date'].apply(lambda x: Time(x).mjd)

        # Eliminar la columna date
        df = df.drop('date', axis=1)
        df = df.reindex(columns=['mjd','lambda', 'flux_lambda', 'e_flux_lambda', 'instrument'])
        # Guardar el DataFrame resultante en un nuevo archivo
        df.to_csv(f"{path_output}SN{sn_name}.dat", index=False, header=True, na_rep='nan')
        os.remove(f"{path_output}SN{sn_name}_raw.dat")

    else: 
        with open(f"{path_output}{sn_name}_raw.dat", 'r') as file:
            for line in file:
                # Dividir la línea en columnas
                columns = re.split(r'\s+', line.strip())
                # Si la fila tiene menos de 4 columnas, agregar NaN y permutar
                if len(columns) == 4:
                    columns.append(np.nan)  # Añadir NaN
                    columns[2], columns[-1] = columns[-1], columns[2]  # Permutar la tercera con la cuarta columna
                    columns[3], columns[-1] = columns[-1], columns[3]  # Permutar la tercera con la cuarta columna
                # Si la fila tiene más de 4 columnas, la truncamos
                if len(columns) > 4:
                    columns = columns[:6]
                data.append(columns)

        # Crear un DataFrame con los datos procesados
        headers = ['lambda', 'flux_lambda', 'e_flux_lambda', 'instrument', 'date']
        df = pd.DataFrame(data, columns=headers)
        # Asegurarse de que la columna 'date' sea de tipo string
        df['date'] = df['date'].astype(str)
        # Convertir cada fecha a MJD
        df['mjd'] = df['date'].apply(lambda x: Time(x).mjd)

        # Eliminar la columna date
        df = df.drop('date', axis=1)
        df = df.reindex(columns=['mjd','lambda', 'flux_lambda', 'e_flux_lambda', 'instrument'])
        # Guardar el DataFrame resultante en un nuevo archivo
        df.to_csv(f"{path_output}{sn_name}.dat", index=False, header=True, na_rep='nan')
        os.remove(f"{path_output}{sn_name}_raw.dat")

In [None]:
def running_merge_spectra(sn_name: str, path_input: str, path_output: str, key_value: bool =None):
    
    if sn_name.startswith('SN') and key_value == 'SN':
        sn_name = sn_name.replace('SN', '')
    #print(sn_name)
    if key_value == None:
        return print('Error u need a key_value !!')
        
    if key_value == 'Weird':
        return print('This is a weird file')
    
    if key_value == 'SN':
        if f'SN{sn_name}.dat' in os.listdir(path=path_output):
           return print('File already exist')
        
        #else:
            #print('Running SN',sn_name)
        try:
            #print('Obtaining the raw data ...')
            files2raw(sn_name = sn_name,path_input=path_input, path_output = path_output,key_value = key_value)
        except:
            print('Supernova SN',sn_name,'| Error in raw')
        try:
            #print('Obtaining the dataframe ....')
            file2dataframe(sn_name=sn_name, path_input=path_output, path_output=path_output,key_value=key_value)
        except:
            print('Supernova SN',sn_name,'| Error in dataframe')

    else:
        if f'{sn_name}.dat' in os.listdir(path=path_output):
            return print('File already exist')
        #else:
            #print('Running SN',sn_name)
        #files2raw(sn_name=sn_name,path_input=path_input, path_output=path_output,key_value=key_value)
        try:
            #print('Obtaining the raw data ...')
            files2raw(sn_name=sn_name,path_input=path_input, path_output=path_output,key_value=key_value)
        except:
            print('Supernova ',sn_name,'| Error in raw')
        try:
            #print('Obtaining the dataframe ....')
            file2dataframe(sn_name=sn_name, path_input=path_output, path_output=path_output,key_value=key_value)
        except:
            print('Supernova ',sn_name,'| Error in dataframe')

In [None]:
SN_dict_keys = list(SN_dict.keys())
SN_dict_keys

In [None]:
SN_dict['Other_SN'].remove('SN')
SN_dict['Other_SN']

In [None]:
# Ya estuvo SN, 0
# Ya estuvo SNLS, 1
# Ya estuvo Other_SN, 2
# Ya estuvo Other, 3
# No he usado Weird, 4

key_value = SN_dict_keys[2]
count = 0
total = len(SN_dict[key_value])
for sn_name in SN_dict[key_value]:
    if count % 10 == 0:
        print(f'{count}/{total}')
    running_merge_spectra(sn_name=sn_name, path_input=PATH_INPUT, path_output=PATH_OUTPUT,key_value=str(key_value))
    count += 1

print('\nEnd...')

## Plotting

In [None]:
import os
import random
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
all_files = os.listdir(path='./data/spectra/')
all_files = [file for file in all_files if "_raw" not in file]

In [None]:
sn_name = random.choice(all_files)
sn_name = 'SN2017awk.dat'
data = pd.read_csv('./data/spectra/'+sn_name)

def plotobject(df,sn_name, inst_name):
  fig, ax = plt.subplots(figsize=(10, 6))
  for mjd_date, group_df in df.groupby('mjd'):
       ax.plot(group_df['lambda'], group_df['flux_lambda'], label=f'{mjd_date:.2f} MJD')
  ax.legend(frameon=False)
  ax.set_xlabel('Lambda')
  ax.set_ylabel('Flux_lambda')
  ax.set_title('Supernova:'+sn_name.split(".")[0]+" | Instrument: "+inst_name.split(".")[0])
  

data_instrument = data.groupby('instrument')
for inst_name, group in data_instrument:
    plotobject(group, sn_name=sn_name, inst_name=inst_name)

In [None]:
def plotobject(df):
    df.groupby(["date"]).apply(
        lambda df:ax.plot(df['lambda'], df['flux'], alpha=0.3))  
fig, ax = plt.subplots(figsize=(24, 6))
data.groupby('instrument').apply(plotobject)

## Master DataSet

In [None]:
PATH_INPUT = './data/spectra_wiserep/'

all_files = [file for file in os.listdir(path=PATH_INPUT) if '_raw' not in file]
print(f'The total amount of Supernovae ready to process is:',len(all_files))

In [None]:
supernovae_metadata = pd.read_csv('./wiserep_spectra_metadata.csv',low_memory=False)
supernovae_metadata.shape

In [None]:
supernovae_metadata.columns

I am using the data from LSST.


Then, we define the wavelength grid as follows:

In [None]:
wavelength_lsst = {
    'u': [3206.34, 4081.51],
    'g': [3876.02, 5665.33],
    'r': [5377.19, 7055.16],
    'i': [6765.77, 8325.05],
    'z': [8035.39, 9375.47],
    'y': [9089.07, 10915.01]
}

# Obtain the min and max values of the LSST's wavelength
wavelength_grid_min = min([wavelength for wavelength_list in wavelength_lsst.values() for wavelength in wavelength_list])
wavelength_grid_max = max([wavelength for wavelength_list in wavelength_lsst.values() for wavelength in wavelength_list])

print(f'The min wavelength value to create the grid is: {wavelength_grid_min:.2f} Angstrom')
print(f'The max wavelength value to create the grid is: {wavelength_grid_max:.2f} Angstrom')

# Number grid's bins
nwavelength_grid = 1838

# Array equal spacing of wavelengths 
wavelength_grid_array = np.logspace(np.log10(wavelength_grid_min),np.log10(wavelength_grid_max),nwavelength_grid)

Now, we'll compute the **Resolution** in velocity terms, $v$. To this, we use the classic Doppler effect in wavelength terms.

For a moving source, the relationship between the observed wavelength ($\lambda^{'}$) and the emitted wavelength ($\lambda$) is:

|Source approaching| Source moves away |
|:-:|:-:|
|$\lambda^{'} = \lambda \left( 1-\frac{v}{c}\right)$ | $\lambda = \lambda^{'} \left( 1+\frac{v}{c}\right)$ |

Based on the definition of $\Delta \lambda = \lambda_{i} - \lambda_{i-1}$. Then, average of consecutive wavelengths is:

$$\lambda_{average} = \frac{\lambda_{i} + \lambda_{i-1}}{2}$$

Then we obtain the next ratio:

$$\frac{\Delta \lambda}{\lambda_{average}} = \frac{\Delta \lambda}{\frac{\lambda_{i} + \lambda_{i-1}}{2}} = 2 \times \frac{\Delta \lambda}{\lambda_{i} + \lambda_{i-1}}$$

At low velocities, the relationship between wavelength change and velocity is approximately linear: 

$$\frac{\Delta \lambda}{\lambda_{average}} \approx \frac{v}{c}$$

Reorganizing

$$v \approx \frac{\Delta \lambda}{\lambda_{average}} \times c$$
$$v \approx 2 \times \left(\frac{\lambda_{i} - \lambda_{i-1}}{\lambda_{i} + \lambda_{i-1}}\right) \times c$$

In [None]:
# We need to change nwavelength_grid, beacuse we want obtain a velocity equal to 200 km/s
CSPEED = 3e5 # km/s
dwavelength = wavelength_grid_array[1:] - wavelength_grid_array[:-1]
res = 2 * dwavelength/(wavelength_grid_array[1:] + wavelength_grid_array[:-1]) * CSPEED

print(f'The mean resolution is: {res.mean():.2f} km/s')

In [None]:
# if we plot the resolution as wavelength function
fig, ax = plt.subplots()
ax.plot(wavelength_grid_array[:-1], res)
ax.set_xlabel('Wavelength [Angstrom]')
ax.set_ylabel('Resolution [km/s]')
plt.show()

Reading all spectra

In [None]:
all_files = [files for files in os.listdir(path=PATH_INPUT) if '_raw' not in files]
print(f'The total amount of SNs to study will be: {len(all_files)}\n')

print('Some examples are: ...')
print(all_files[:5])

Now we will try with an example

In [None]:
file   = all_files[0]

# Obtain the transient name
snname = file.split('.')[0]
print(f'The working supernova will be {snname} ...')

# Reading the spectra data
data = pd.read_csv(PATH_INPUT+file)

# Converting wavelength to log_wavelength
data["log10lambda"] = np.log10(data["lambda"])

# fool pandas to make it think log10lambda is days
data["log10lambda_idx"] = data["log10lambda"].apply(lambda x: pd.Timedelta(x, 'days'))
data.set_index("log10lambda_idx", inplace=True)
data.head()

Show wavelength resolution in km/s, note that it increases with wavelength.

$$ v \approx \frac{d\lambda}{\lambda} \times c = d\ln \lambda \times c = d\log \lambda \times \ln(10) \times c$$

Then, the resolution will be

In [None]:
res = np.abs(data.log10lambda[:-1].values - data.log10lambda[1:].values) * np.log(10) * CSPEED
res

In [None]:
dv = 200 # km/s, using values to show the effect
dvsmooth = 2000 # km/s

data = data.sort_index()

dlog10lambda = dv / CSPEED / np.log(10) * 24 * 3600 # pseudo seconds

We will convolve the observed spectra with a kernel width of half the desired loglambda grid (km/s) and will use a 500 km/s smoothing kernel to define a continuum.

In [None]:
dv = 200 # km/s, using values to show the effect
dvsmooth = 2000 # km/s

How many seconds would dlog10lambda correspond to? (remember we are fooling pandas)

In [None]:
dlog10lambda = dv / CSPEED / np.log(10) * 24 * 3600 # pseudo seconds
dlog10lambda

And dlog10lambdasmooth? (remember we are fooling pandas)

In [None]:
dlog10lambdasmooth = dvsmooth / CSPEED / np.log(10) * 24 * 3600 # pseudo seconds
dlog10lambdasmooth

Plot original and smoothed versions

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
ax.plot(np.array(data["lambda"]), np.array(data.flux_lambda), marker='o', alpha=0.5)
ax.plot(np.array(data["lambda"]), data.flux_lambda.rolling(f'{int(dlog10lambda)}s', center=True).mean(), alpha=0.5)
ax.plot(data["lambda"], data.flux_lambda.rolling(f'{int(dlog10lambdasmooth)}s', center=True).mean(), c='r', lw=4, alpha=0.5)
ax.set_xlabel(r"$\lambda$ [A]")
plt.show()

Get km/s w.r.t. FeII. 

WARNING: this assumes that the spectra is in rest frame wavelength

In [None]:
FeII = 5169
data["vFeII"] = (data["lambda"] - FeII) / FeII * CSPEED

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
ax.plot(data["lambda"], data.flux_lambda, marker='o', alpha=0.5)
ax.plot(data["lambda"], data.flux_lambda.rolling(f'{int(dlog10lambda)}s', center=True).mean(), alpha=0.5)
ax.plot(data["lambda"], data.flux_lambda.rolling(f'{int(dlog10lambdasmooth)}s', center=True).mean(), c='r', lw=4, alpha=0.5)
ax.axvline(FeII)
ax.set_xlabel(r"$\lambda$ [A]")
plt.show()

Plot dispersion w.r.t. smoothed version (use as empirical error)

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(data["lambda"], data.flux_lambda - data.flux_lambda.rolling(f'{int(dlog10lambda)}s', center=True).mean())
ax.plot(data["lambda"], (data.flux_lambda.rolling(f'{int(dlog10lambda)}s', center=True).mean() - data.flux_lambda.rolling(f'{int(dlog10lambdasmooth)}s', center=True).mean()))
delta = (data.flux_lambda.rolling(f'{int(dlog10lambda)}s', center=True).mean() - data.flux_lambda.rolling(f'{int(dlog10lambdasmooth)}s', center=True).mean()).rolling(f'{int(dlog10lambdasmooth)}s', center=True).std()
ax.plot(data["lambda"], -delta, c='gray')
ax.plot(data["lambda"], delta, c='gray')
ax.set_xlabel(r"$\lambda$ [A]")

plt.show()

From this point I will generalizate all the calculations to all supernovae

In [None]:
def smooth_flux(data:pd.DataFrame, dv:float = 200, dvsmooth:float = 2000) -> pd.DataFrame:
    """
    
    Params
    ------
    data: pd.DataFrame with the spectra data
    dv: velocity 

    Return
    ------
    pd.Dataframe mainly flux rolled of the log_lambda
    """

    CSPEED = 3e5 # light_speed in km/s

    dlog10lambda = dv / CSPEED / np.log(10) * (24 * 3600) # pseudo seconds
    dlog10lambdasmooth = dvsmooth / CSPEED / np.log(10) * 24 * 3600 # pseudo seconds

    #data["log10lambda"] = np.log10(data["lambda"])
    #data["log10lambdasmooth"] = np.log10(data["lambda"])

    # fool pandas to make it think log10lambda is days
    #data["log10lambda_idx"] = data["log10lambda"].apply(lambda x: pd.Timedelta(x, 'days'))

    result = []
    for name, group in data.groupby('mjd'):
        group = group.sort_values(by='lambda', ascending=True)
        group["log10lambda"] = np.log10(data["lambda"])
        group["log10lambda_idx"] = group["log10lambda"].apply(lambda x: pd.Timedelta(x, 'days'))
        group = group.set_index('log10lambda_idx')
        group["flux_log10lambda_rolling"] = group.flux_lambda.rolling(f'{int(dlog10lambda)}s', center=True).mean()
        group["flux_log10lambda_rolling_smooth"] = group.flux_lambda.rolling(f'{int(dlog10lambdasmooth)}s', center=True).mean()
        group["eflux_log10lambda_rolling"] = (group.flux_log10lambda_rolling - group.flux_lambda.rolling(f'{int(dlog10lambdasmooth)}s', center=True).mean()).rolling(f'{int(dlog10lambdasmooth)}s', center=True).std()
        result.append(group)
    data = pd.concat(result)
    data.reset_index(inplace=True)
    data = data.drop('log10lambda_idx',axis=1)
    data
    
    return data

In [None]:
def obtain_interpolated_flux(x, y, lambda_grid:np.array) -> np.array:

    f = interpolate.interp1d(x, y, fill_value=np.nan, bounds_error=False)

    # computing the new flux in the lambda_grid
    flux_new = f(lambda_grid)

    return flux_new

In [None]:
# Lista para almacenar los resultados
def arrange_spectra(sn_name:str,data: pd.DataFrame,oid:int,
                    lambda_grid:np.array, nlambda_grid: int) -> pd.DataFrame:
    results = []
    dlog10lambdasmooth = 2000 / CSPEED / np.log(10) * 24 * 3600
    for inst_name, inst_group in data.groupby('instrument'):
        for mjd, mjd_group in inst_group.groupby('mjd'):
            flux_lambda = obtain_interpolated_flux(x=data['lambda'], y=data['flux_log10lambda_rolling'], lambda_grid=lambda_grid)
            flux_lambda_smooth = obtain_interpolated_flux(x=data['lambda'], y=data['flux_log10lambda_rolling_smooth'], lambda_grid=lambda_grid)
            eflux_lambda = flux_lambda-flux_lambda_smooth

            data_flux = {
                'oid': oid,
                'snname':sn_name,
                'instrument': inst_name,
                'mjd': mjd,
                'lambda_grid_min': lambda_grid.min(),
                'lambda_grid_max': lambda_grid.max(),
                'nlambda_grid': nlambda_grid,
                'lambda_data_min': mjd_group['lambda'].min(),
                'lambda_data_max': mjd_group['lambda'].max(),
                #'flux_lambda': mjd_group.flux_log10lambda_rolling.tolist(),
                'flux_lambda': flux_lambda,
                'flux_lambda_smooth': flux_lambda_smooth,
                'e_flux_lambda': mjd_group.eflux_log10lambda_rolling.tolist(),
                #'e_flux_lambda': eflux_lambda,
                }
            results.append(data_flux)

            oid += 1

    unique_table = pd.DataFrame(results)
    return unique_table, oid

In [None]:
file = all_files[0]
data = pd.read_csv(PATH_INPUT+file)
data = smooth_flux(data=data)

In [None]:
master_dataframe = pd.DataFrame()
indx_ini = 14000
indx_fin = 18000
oid = indx_ini
for file in all_files[indx_ini:indx_fin]:
    try:
        sn_name = file.split('.')[0]
        data = pd.read_csv(PATH_INPUT+file)
        data = smooth_flux(data=data)
        result_table, oid = arrange_spectra(sn_name,data,oid, wavelength_grid_array, nwavelength_grid)
        master_dataframe = pd.concat([master_dataframe, result_table])
        oid = oid
    except:
        continue

In [None]:
master_dataframe = pd.DataFrame()
indx_ini = 6000
indx_fin = 10000
oid = indx_ini
for file in all_files[indx_ini:indx_fin]:
    sn_name = file.split('.')[0]
    data = pd.read_csv(PATH_INPUT+file)
    data = smooth_flux(data=data)
    result_table, oid = arrange_spectra(sn_name,data,oid, wavelength_grid_array, nwavelength_grid)
    master_dataframe = pd.concat([master_dataframe, result_table])
    oid = oid

In [None]:
master_dataframe.to_pickle(f'./master_spectra_table_{indx_ini}_{indx_fin}.pkl')

In [None]:
master_dataframe = pd.read_pickle(f'./master_spectra_table_{indx_ini}_{indx_fin}.pkl')
master_dataframe

In [None]:
master_dataframe = pd.read_pickle(f'./spectra_wisrep_20240622.pkl')
master_dataframe = master_dataframe.drop(master_dataframe.columns[0], axis=1)

In [None]:
master_dataframe

In [None]:
sn_data = master_dataframe.iloc[0]
test = sn_data['flux_lambda']
test

In [None]:
sn_data = master_dataframe.iloc[0]
x = np.logspace(np.log10(sn_data.lambda_grid_min),np.log10(sn_data.lambda_grid_max),sn_data.nlambda_grid)
y = sn_data.flux_lambda
fig, ax = plt.subplots()
sn_data.plot(x=x, y='flux_lambda')

In [None]:
len(master_dataframe.snname.unique())

In [None]:
import psycopg2
import requests 
credentials_file = "https://raw.githubusercontent.com/alercebroker/usecases/master/alercereaduser_v4.json"
params = requests.get(credentials_file).json()["params"]
conn = psycopg2.connect(dbname=params["dbname"], user=params["user"], host=params["host"], password=params["password"])

In [None]:
def ztf_crossmatch(conn, df, search_radius=1):
    '''
    conn: connection to database
    df: external catalog dataframe (with columns id_source, ra, dec)
    search_radius: external radius in arcsec (default=1)

    The output is a dataframe with the source id, ra, and dec,
    as well as the ALeRCE database meanra, meandec, the crossmatch distance
    in degrees and the time of first detection according to the ALeRCE database
    '''
    
    # Preparar el dataframe
    objects = []
    for _, row in df.iterrows():
        objects.append(f"(\'{row.id_source}\', {row.ra}, {row.dec}, \'{row.redshift}\', \'{row.true_label}\')")
    objects_str = ",\n".join(objects)

    # Convertir el radio a grados
    search_radius = search_radius / 3600

    # Preparar el query
    query = """
    WITH catalog (source_id, ra, dec, redshift, true_label) AS (
        VALUES
            {values}
    )
    SELECT 
        c.source_id, c.ra, c.dec, o.oid, o.meanra, o.meandec, q3c_dist(c.ra, c.dec, o.meanra, o.meandec), 
        o.firstmjd,
        c.redshift, c.true_label
    FROM object o, catalog c
    WHERE
        q3c_join(c.ra, c.dec, o.meanra, o.meandec, {radius})
    """

    # Formatear el query final
    query_str = query.format(values=objects_str, radius=search_radius)

    # Ejecutar el query
    try:
        matches = pd.read_sql(query_str,conn)
        matches["q3c_dist"] = matches.q3c_dist * 3600
        matches.rename({"q3c_dist": "dist_arcsec"}, axis=1, inplace=True)
        return matches
    except:
        print("Error accessing the database. Most common causes are timeout errors or wrongly formatted input query.")

In [4]:
supernovae_metadata = pd.read_csv('./wiserep_spectra_metadata.csv',low_memory=False)

In [5]:
supernovae_cross_names = [
    supernovae_metadata['Obj. ID'],
    supernovae_metadata['IAU name'], 
    supernovae_metadata['Internal name/s'].apply(lambda x: str(x).split(',')[0].strip() if pd.notnull(x) else x)
]
supernovae_cross_names[0].name, supernovae_cross_names[1].name, supernovae_cross_names[2].name = 'Obj. ID', 'IAU name', 'Internal name'
supernovae_cross_names = pd.concat(supernovae_cross_names, axis=1)
supernovae_cross_names

Unnamed: 0,Obj. ID,IAU name,Internal name
0,5875,SN 1999Z,
1,5875,SN 1999Z,
2,5875,SN 1999Z,
3,5875,SN 1999Z,
4,5875,SN 1999Z,
...,...,...,...
46446,8896,SN 2016hhv,ATLAS16drn
46447,8896,SN 2016hhv,ATLAS16drn
46448,205,SN 2016hht,PS17aja
46449,427,SN 2016hhj,iPTF16hhj


In [None]:
dfsn = supernovae_metadata[["IAU name", "Obj. RA", "Obj. DEC", "Redshift", "Obj. Type"]].rename({"IAU name":"id_source",'Obj. RA': "ra","Obj. DEC":"dec", "Redshift": "redshift", "Obj. Type": "true_label"},axis=1)
results = ztf_crossmatch(conn, dfsn)
results

In [None]:
results.to_pickle('results_wiserep.pkl')

In [None]:
results.source_id = results.source_id.replace('nan',np.nan)


In [None]:
results = results.dropna()
results

In [None]:
test_supernovae_metadata = supernovae_metadata.set_index('IAU name')
test_supernovae_metadata

In [None]:
test_supernovae_metadata[results.source_id]

In [2]:
results = pd.read_pickle('results_wiserep.pkl')
new_results = results.drop_duplicates(subset='source_id',keep='first').reset_index(drop=True)
new_results = new_results[['oid','source_id','true_label','firstmjd','redshift','ra','dec','meanra','meandec']]

In [None]:
new_results.to_pickle('ALERCExWiserp20240721_to_AvMW.pkl')
new_results

In [6]:
new_SN_cross_names = supernovae_cross_names.drop_duplicates(subset='Obj. ID',keep='first').reset_index(drop=True)#.set_index('IAU name')
new_SN_cross_names

Unnamed: 0,Obj. ID,IAU name,Internal name
0,5875,SN 1999Z,
1,6732,SN 1999X,
2,5821,SN 1999U,
3,1885,SN 1999S,
4,1545,SN 1999Q,
...,...,...,...
19924,935,SN 2016hid,PS16exz
19925,3311,SN 2016hhz,Gaia16cae
19926,8896,SN 2016hhv,ATLAS16drn
19927,205,SN 2016hht,PS17aja


In [7]:
crossmatched = pd.merge(left=new_results, right=new_SN_cross_names, how='inner', left_on='source_id', right_on='IAU name')[['source_id','IAU name','Internal name','oid']]
crossmatched.source_id = crossmatched.source_id.apply(lambda x: x.replace(' ',''))
crossmatched

Unnamed: 0,source_id,IAU name,Internal name,oid
0,SN1990E,SN 1990E,,ZTF22abqtedd
1,SN2024yr,SN 2024yr,PS24ara,ZTF24aaekmnz
2,SN2024yq,SN 2024yq,ZTF24aabvtdh,ZTF24aabvtdh
3,SN2024ym,SN 2024ym,ZTF24aabplhb,ZTF24aabplhb
4,2024xx,2024xx,ATLAS24auj,ZTF24aabuisb
...,...,...,...,...
9720,SN2021tgb,SN 2021tgb,ATLAS21bbue,ZTF21abljmmv
9721,SN2021tfx,SN 2021tfx,ZTF21abkrteb,ZTF21abkrteb
9722,SN2016nx,SN 2016nx,PS16pb,ZTF22abntkbk
9723,SN2006nz,SN 2006nz,,ZTF18abshjwa


In [8]:
master_dataframe = pd.read_pickle('master_spectra_table_main.pkl')

In [9]:
master_dataframe.columns

Index(['oid', 'snname', 'instrument', 'mjd', 'lambda_grid_min',
       'lambda_grid_max', 'nlambda_grid', 'lambda_data_min', 'lambda_data_max',
       'flux_lambda', 'flux_lambda_smooth', 'e_flux_lambda'],
      dtype='object')

In [10]:
cross_master = master_dataframe.merge(crossmatched[['source_id','oid']], how='left', left_on='snname',right_on='source_id').drop('source_id', axis=1)
cross_master = cross_master.merge(crossmatched[['Internal name','oid']], how='left', left_on='snname',right_on='Internal name', suffixes=('', '_new')).drop('Internal name',axis=1)
cross_master['oid'] = cross_master['oid_y']
cross_master = cross_master.drop(['oid_x','oid_y'],axis=1)
cross_master = cross_master[['oid','snname', 'instrument', 'mjd', 'lambda_grid_min', 'lambda_grid_max',
      'nlambda_grid', 'lambda_data_min', 'lambda_data_max', 'flux_lambda', 'flux_lambda_smooth',
      'e_flux_lambda']]
cross_master.instrument = cross_master.instrument.apply(lambda x: ('_').join(x.split('_')[0:2])) 
cross_master

Unnamed: 0,oid,snname,instrument,mjd,lambda_grid_min,lambda_grid_max,nlambda_grid,lambda_data_min,lambda_data_max,flux_lambda,flux_lambda_smooth,e_flux_lambda
0,,SN2022pru,Lick-3m_KAST,59788.215972,3206.34,10915.01,1838,3504.527001,10393.131809,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.21898910712578754, 0.1979063641775711, 0.18..."
1,,SN2022ytx,LT_SPRAT,59886.979942,3206.34,10915.01,1838,4047.600000,7994.400000,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.2513674591903225, 0.2889965846960843, 0.113..."
2,,SN2022ytx,LT_SPRAT,59907.898866,3206.34,10915.01,1838,4066.000000,7994.400000,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.12373791200226614, 0.10053409443820172, 0.1..."
3,,SNLS-07D3do,Gemini-N_GMOS,54200.000000,3206.34,10915.01,1838,5104.440000,9352.610000,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1.5368479599410815e-18, 1.394673534721022e-18..."
4,,SNLS-07D3do,Gemini-N_GMOS,54200.000000,3206.34,10915.01,1838,5048.346943,9355.344674,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1.797687567656606e-18, 1.8212694935234954e-18..."
...,...,...,...,...,...,...,...,...,...,...,...,...
25948,ZTF21aanvncv,SN2021efd,NOT_ALFOSC,59277.154005,3206.34,10915.01,1838,3399.900000,9675.765091,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[9.342260744967236e-17, 8.189705456575588e-17,..."
25949,ZTF20aauoipy,SN2020etk,LT_SPRAT,58970.392257,3206.34,10915.01,1838,4020.000000,7994.400000,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.44910237291240807, 0.34667301062037215, 0.2..."
25950,ZTF20abkwvgc,SN2020oqp,P60_SEDM,59047.214850,3206.34,10915.01,1838,3776.700000,9223.300000,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
25951,,SDSS-SN-082,Sloan_SDSS-Spec,54233.000000,3206.34,10915.01,1838,3822.082000,9189.671900,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1.1354836383504999e-17, 1.118140824452677e-17..."


In [11]:
cross_master.to_pickle('spectra_ALeRCE20240801_x_wisrep_20240622.pkl')

In [None]:
hola = pd.read_pickle('spectra_ALeRCE20240630_x_wisrep_20240622.pkl')
hola

In [None]:
data_instrument = data.groupby('instrument')
data_instrument
for int, group in data_instrument:
    group.to_csv(f'{sn_name}_{int}.csv')

In [None]:
sn_name = 'SN2006dv.dat'
data = pd.read_csv('./data/spectra/'+sn_name)

x_lambda_array = data['lambda']
y = data['flux_lambda']
f = interpolate.interp1d(x_lambda_array, y, fill_value=np.nan)

def plotobject(df,sn_name, inst_name):
  fig, ax = plt.subplots(figsize=(10, 6))
  for mjd_date, group_df in df.groupby('mjd'):
       ax.plot(group_df['lambda'], group_df['flux_lambda'], label=f'{mjd_date:.2f} MJD')
  ax.legend(frameon=False)
  ax.set_xlabel('Lambda')
  ax.set_ylabel('Flux_lambda')
  ax.set_title('Supernova:'+sn_name.split(".")[0]+" | Instrument: "+inst_name.split(".")[0])
  

data_instrument = data.groupby('instrument')
for inst_name, group in data_instrument:
    plotobject(group, sn_name=sn_name, inst_name=inst_name)

In [None]:
master_dataframe = pd.DataFrame()
indx_ini = 0
indx_fin = 12000
oid = 11000 - 1
for file in all_files[indx_ini:indx_fin]:
    sn_name = file.split('.')[0]
    data = pd.read_csv(PATH_input+file)
    result_table, oid = arrange_spectra(sn_name=sn_name, data=data,oid=oid)
    master_dataframe = pd.concat([master_dataframe, result_table])
    oid = oid

In [None]:
master_dataframe.to_csv(f'./master_spectra_table_{indx_ini}_{indx_fin}.cvs')

In [None]:
import pandas as pd
import os

In [None]:
master_dataframe_list = [file for file in os.listdir(path='./') if file.startswith("master_spectra")]
master_dataframe_list.remove('master_spectra_wiserep.csv')
#master_dataframe_list.remove('master_spectra_table_main.pkl')
master_dataframe_list

In [None]:
master_dataframe = pd.DataFrame()
for master_spectra_table in master_dataframe_list:
    data = pd.read_pickle(master_spectra_table)
    master_dataframe = pd.concat([master_dataframe, data])

In [None]:
master_dataframe.to_pickle(f'./master_spectra_table_main.pkl')

In [None]:
master_dataframe = pd.read_pickle('master_spectra_table_main.pkl')

In [None]:
master_dataframe.shape

## Working with the DataFrame

In [None]:
master_dataframe = pd.read_csv('master_spectra_table_main.cvs')
master_dataframe = master_dataframe.drop(['Unnamed: 0.1','Unnamed: 0'],axis=1)
master_dataframe.head()

In [None]:
sn_dataframe = master_dataframe[master_dataframe['snname'] == 'SN2021rfs']
#sn_dataframe = sn_dataframe[0]
sn_dataframe

In [None]:
sn_name = 'SN2021rfs.dat'
data = pd.read_csv('./data/spectra/'+sn_name)

def plotobject(df,sn_name, inst_name):
  fig, ax = plt.subplots(figsize=(10, 6))
  for mjd_date, group_df in df.groupby('mjd'):
       ax.plot(group_df['lambda'], group_df['flux_lambda'], label=f'{mjd_date:.2f} MJD')
  ax.legend(frameon=False)
  ax.set_xlabel('Lambda')
  ax.set_ylabel('Flux_lambda')
  ax.set_title('Supernova:'+sn_name.split(".")[0]+" | Instrument: "+inst_name.split(".")[0])
  

data_instrument = data.groupby('instrument')
for inst_name, group in data_instrument:
    plotobject(group, sn_name=sn_name, inst_name=inst_name)

In [None]:
fig, ax = plt.subplots()
sn_dataframe.plot(x='flux_lambda',y=lambda_grid_lenght)


## Importing AstroDash

In [None]:
import tensorflow as tf

In [None]:
class ConvNetLayer(object):
    def __init__(self, N, ntypes, imWidth, imWidthReduc):
        self.N = N
        self.ntypes = ntypes
        self.imWidth = imWidth
        self.imWidthReduc = imWidthReduc

    def build_layer(self, prevHPool, prevNumFeatures, numFeatures):
        W_conv = self._weight_variable([5, 5, prevNumFeatures, numFeatures])
        b_conv = self._bias_variable([numFeatures])
        h_conv = tf.nn.relu(self._conv2d(prevHPool, W_conv) + b_conv)
        h_pool = self._max_pool_2x2(h_conv)
        # print(h_pool)

        return h_pool

    def connect_layers(self, h_pool, numFeatures, layerNum):
        W_fc = self._weight_variable(
            [int(self.imWidthReduc / layerNum * self.imWidthReduc / layerNum * numFeatures), 1024])
        b_fc = self._bias_variable([1024])
        h_pool_flat = tf.reshape(h_pool,
                                 [-1, int(self.imWidthReduc / layerNum * self.imWidthReduc / layerNum * numFeatures)])
        h_fc = tf.nn.relu(tf.matmul(h_pool_flat, W_fc) + b_fc)

        return h_fc

    def dropout(self, h_fc):
        keep_prob = tf.placeholder(tf.float32)
        h_fc_drop = tf.nn.dropout(h_fc, keep_prob)

        return keep_prob, h_fc_drop

    def readout_layer(self):
        W_fc = self._weight_variable([1024, self.ntypes])
        b_fc = self._bias_variable([self.ntypes])

        return W_fc, b_fc

    def _weight_variable(self, shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def _bias_variable(self, shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def _conv2d(self, x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def _max_pool_2x2(self, x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


# Other things

In [None]:
import os
len([file for file in os.listdir(path='./data/spectra_wiserep_raws')])