_Authors: Andreia Dourado, Bruno Moraes_

_Adapted from Melissa Graham example notebook.

__Description: This is a first notebook to create a training set of galaxies using a random sample from the Rubin Science Platform DP0.2 data__


In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tables_io
import qp
import sys 
from matplotlib import gridspec

### 1. Reading the data

In [None]:
data = pd.read_csv("random_data.csv", usecols=['objectId','ra','dec','redshift','u_Mag','g_Mag','r_Mag','i_Mag','z_Mag','y_Mag'])

In [None]:
data

### 2. Magnitude uncertainties

The photometric error was adapted from Melissa Grahram's notebook, according to section 3.2.1 of Ivezic et al(2019), given to:

$$ \sigma_{\rm rand}^2 = (0.04 − \gamma)x + \gamma x^2 $$

where $ x = 10^{0.4(m - m_5)}$, $m_5$ is the $5 \sigma$ limiting magnitude, $m$ is the galaxy magnitude and $\gamma$ has the values fixed below for LSST's filters conditions.


In [None]:
m5 = [26.09, 27.38, 27.53, 26.83, 26.06, 24.86]
gamma = [0.037, 0.038, 0.039, 0.039, 0.04, 0.04]
bands = ['u', 'g', 'r', 'i', 'z', 'y']

In [None]:
for f, filt in enumerate(bands):
    temp = np.power(10, 0.4*(data[filt+'_Mag'] - m5[f]))
    data['mag_err_'+filt+'_lsst'] = np.sqrt( (0.04-gamma[f])*temp + (gamma[f]*temp**2) )
    del temp
    
    #uncertainty floor
    tx = np.where(data.loc[:, 'mag_err_'+filt+'_lsst'] < 0.005)[0]
    data.loc[tx, 'mag_err_'+filt+'_lsst'] = 0.005
    del tx
    
    #galaxy with error>0.2 not to be used
    tx = np.where(data.loc[:, 'mag_err_'+filt+'_lsst'] > 0.2)[0]
    data.loc[tx, 'mag_err_'+filt+'_lsst'] = float('NaN')
    del tx
  

In [None]:
data

### 3. Generate observed magnitudes

Getting a normal distribuition with a standart deviation equal to the uncertainty, it was added a random value from this distribuition in the true magnitude for each galaxy. 

In [None]:
for f, filt in enumerate(bands):
    data['mag_'+filt+'_lsst'] = data[filt+'_Mag'] + \
                                       (np.random.normal(size=len(data)) * \
                                        data['mag_err_'+filt+'_lsst'])
    
    tx = np.where(np.isnan(data.loc[:, 'mag_err_'+filt+'_lsst']))[0]
    data.loc[tx, 'mag_'+filt+'_lsst'] = float('NaN')
    del tx

In [None]:
data

#### 4. Saving the file

In [None]:
data_trainig_set=data[['objectId','ra','dec','redshift','mag_u_lsst','mag_g_lsst','mag_r_lsst','mag_i_lsst','mag_z_lsst','mag_y_lsst','mag_err_u_lsst','mag_err_g_lsst','mag_err_r_lsst','mag_err_i_lsst','mag_err_z_lsst','mag_err_y_lsst']]

In [None]:
data_trainig_set

In [None]:
data_trainig_set.to_csv('traning_set.csv')