# Criação do training set
_Autores: Andreia Dourado, Bruno Moraes_

_Adaptado do notebook de exemplo feito por Melissa Graham._

__Descrição: Criação de um training set com um conjunto de galáxias obtidos a partir do cross-matching entre uma seleção aleatória de galáxias da tabela Truth DP02 e os objetos da tabela Object DP02, a partir das skinny tables com correção de avermelhamento e flag "detect_isPrimary" aplicados.__


### 1. Importando as bibliotecas

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import gridspec

### 2. Lendo os dados

In [None]:
dataset = pd.read_csv("/home/andreia.dourado/TCC/dp02/truth/data/dp02_truth_lsst_error_model_Y5.csv")

In [None]:
dataset

In [None]:
dataset.columns

In [None]:
bands = ['u','g','r','i','z','y']
data = dataset
for band in bands:
    data = data[data[f'mag_{band}dp02_object'] < 60]

In [None]:
data

### 3. Analisando o conjunto de dados

#### 3.1 Funções

In [None]:
def plot_errors(catalog, title='Errors', gridsize=[400, 200], bins='log', cmap='inferno', xlim=[20, 30], ylim=[0, 100], sigma=0, pop='n', save=0):
    bands = ['u', 'g', 'r', 'i', 'z', 'y']
    fig, axes = plt.subplots(3, 2, figsize=[12, 16])
    fig.suptitle(title, fontsize=16)
    for ax, band in zip(axes.flatten(), bands):
        mag = np.array(catalog[f'mag_{band}dp02_object'])
        err = np.array(catalog[f'magerr_{band}dp02_object'])
        sn = 1 / (10 ** (0.4 * err) - 1)
        ax.hexbin(mag, sn, gridsize=gridsize, cmap=cmap, bins=bins, mincnt=1)
        ax.set_ylabel("S/N", fontsize=14)
        ax.set_xlabel(f"mag {band}", fontsize=14)
        ax.set_ylim(ylim)
        ax.set_xlim(xlim)
        ax.axhline(5, color='black', label='5σ', linestyle='--')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.legend(fontsize=12)
    fig.tight_layout(rect=[0, 0, 1, 0.97])
    if save == 'save':
        plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/erros_log.png')
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/erros_log_{sigma}sigma.png')
    plt.show()

In [None]:
def errors(catalog, sigma=0, pop='',save=0):
    bands = ['u','g', 'r', 'i']
    plt.figure(figsize=[18,4])
    for i, band in enumerate(bands): 
        plt.subplot(int(f'15{str(i+1)}'))
        #query = f'mag_{band}dp02_object != 99. & magerr_{band} < 2.'
        plt.plot(catalog[f'mag_{band}dp02_object'],
                 catalog[f'magerr_{band}dp02_object'], 
                 '.', alpha=0.3, color='steelblue')
        plt.xlabel(f'mag {band}')
        if i == 0: 
            plt.ylabel('error')
        #plt.xlim(16, 28)    
        #plt.ylim(0, 10)
        plt.tight_layout()
    if save == 'save':
         plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/erros.png')    
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/erros_{sigma}sigma.png')
    plt.savefig('/home/andreia.dourado/erros_pop.png')

In [None]:
def mag_histogram(catalog, title='DP0.2', sigma=0,pop='',save=0):
    bands = ['u','g', 'r', 'i', 'z','y']
    colors = ['blue', 'green', 'orange','red','purple','gray']
    plt.figure(figsize=(9,13))
    bins = np.linspace(9, 80, 57)
    j=1
    for i, (band, color) in enumerate(zip(bands,colors)):
        plt.subplot(3,2,j)
        plt.hist(catalog[f'mag_{band}dp02_object'], histtype='stepfilled', bins=bins, label=f'{band} band', alpha = 0.5,
                 edgecolor = "black", color = color)
        plt.xlim(0,100)
        plt.yscale('log')
        plt.xlabel('mag',fontsize=13)
        plt.ylabel('counts',fontsize=13)
        plt.legend(loc=2)
        plt.grid(True)
        j+=1
    plt.suptitle(title)
    if save == 'save':
        plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/mag_hist.png')
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/mag_hist_{sigma}sigma.png')
    plt.savefig('/home/andreia.dourado/mag_pop.png')
    plt.show()

In [None]:
def redshift_hist(catalog, sigma=0,pop='',save=0):
    plt.hist(catalog['redshiftdp01_test_truth'], bins=np.linspace(0,3,200),density=True)
    if save == 'save':
         plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/redshift.png')
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/redshift_{sigma}sigma.png')
    plt.show()

In [None]:
def mag_color(catalog, sigma=0,pop='',save=0):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    mag_diff = {}
    plt.figure(figsize=(9,13))
    i=1
    for band,_band in zip(bands, bands[1::]):
        plt.subplot(3,2,i)
        i+=1
        mag_diff_v = catalog[f'mag_{band}dp02_object']-catalog[f'mag_{_band}dp02_object']
        mag_v = catalog[f'mag_{band}dp02_object']
        plt.hexbin(mag_v, mag_diff_v, None, mincnt=1, cmap='Reds', gridsize=[400,200], bins='log')
        plt.xlabel("mag "+band,fontsize=13)
        plt.ylabel(f"{band}-{_band}",fontsize=13)
        #plt.legend()
        plt.xlim(16,32)
        plt.ylim(-2,5)
        plt.grid(True)
        plt.tight_layout()
    if save == 'save':
         plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/mag_cor.png')    
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/mag_cor_{sigma}sigma.png')
    plt.show()

In [None]:
def color_color(catalog, sigma=0,pop='',save=0):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}dp02_object']
        next_color = catalog[f'mag_{bands[index+2]}dp02_object']
        past_color = catalog[f'mag_{bands[index]}dp02_object']
        plt.hexbin(past_color-color,color-next_color, None, mincnt=1, cmap='turbo', gridsize=[400,200], bins='log')
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar()
        #plt.xlim(-5,5)
        #plt.ylim(-5,5)
    if save == 'save':
         plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/cor_cor.png')
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/cor_cor_{sigma}sigma.png')
    plt.show()

In [None]:
def color_color_red(catalog,xlim=[-5,5],ylim=[-5,5], sigma=0, pop='', save=0):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}dp02_object']
        next_color = catalog[f'mag_{bands[index+2]}dp02_object']
        past_color = catalog[f'mag_{bands[index]}dp02_object']
        plt.hexbin(past_color-color,color-next_color, C=catalog['redshiftdp01_test_truth'], mincnt=1, cmap='turbo', gridsize=[400,200])
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar(label='redshift')
        #plt.xlim(xlim[0],xlim[1])
        #plt.ylim(ylim[0],ylim[1])
    if save == 'save':
         plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/cor_red.png')    
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/cor_red_{sigma}sigma.png')
    plt.show()

In [None]:
def spatial_distribution(catalog, sigma=0, save=0, pop=''):
    plt.hist2d(catalog['coord_radp02_object'], catalog['coord_decdp02_object'], bins=100)
    plt.xlabel('RA [deg]')
    plt.ylabel('Dec [deg]')
    plt.colorbar()
    if save == 'save':
         plt.savefig('/home/andreia.dourado/TCC/dp02_truth_obj/xmatch/area_ceu.png')
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/{sigma}sigma/area_ceu_{sigma}sigma.png')

#### 3.2 Plots

#### Distribuição espacial

In [None]:
spatial_distribution(data)

#### Distribuição de redshifts 

In [None]:
redshift_hist(data)

#### Distribuição de magnitudes

In [None]:
mag_histogram(data)

#### Erros das magnitudes

In [None]:
errors(data)

In [None]:
plot_errors(data)

#### Cor-magnitude

In [None]:
mag_color(data)

#### Color-color

In [None]:
color_color(data)

#### Cor-Cor com redshift

In [None]:
color_color_red(data)

### 4. Cortes de magnitude - RomanRubin

#### 4.1 Cortes SNR

__Corte em SN>10__

In [None]:
data.columns

In [None]:
data_sn_10 = data[data['mag_i_truth_dp02_err'] < 2.5*np.log10(1+0.1)]

In [None]:
data_sn_10

__Corte em SN>5__

In [None]:
data_sn_5 = data[data['mag_i_truth_dp02_err'] < 2.5*np.log10(1+0.2)]

In [None]:
data_sn_5

__Histograma de magnitudes__

In [None]:
sigma = 10

plt.figure(figsize=(10, 6))

hist_color = '#4c72b0'
line_color = '#d62728'

if sigma == 5:
    c = plt.hist(data_sn_5['mag_i_truth_dp02'], bins=300, log=True, color=hist_color, alpha=0.7)  # Removi edgecolor
    plt.axvline(25.25, color=line_color, ls=':', linewidth=1, label=f'$i = 25.25$')
elif sigma == 10:
    c = plt.hist(data_sn_10['mag_i_truth_dp02'], bins=300, log=True, color=hist_color, alpha=0.7)  # Removi edgecolor
    plt.axvline(24.5, color=line_color, ls=':', linewidth=1, label=f'$i = 24.50$')

plt.title(fr'i-band Histogram for SNR $> {sigma}\sigma$', fontsize=18, weight='bold', pad=20)
plt.xlabel(r'$i$-band Magnitude', fontsize=16)
plt.ylabel('Number of Galaxies (log scale)', fontsize=16)
plt.legend(loc='upper left', fontsize=14, frameon=True, shadow=True)

plt.grid(True, which='both', linestyle='--', linewidth=0.5)

plt.tight_layout()
plt.savefig(f'/home/andreia.dourado/TCC/dp02_truth_obj/truth_obj/hist_i_{sigma}sigma.png', dpi=300)

plt.show()

#### 4.2 Cortes de magnitude

__5Sigma: i < 25.2__

In [None]:
data_5sigma = data_sn_5[data_sn_5['mag_i_truth_dp02'] < 25.25]

In [None]:
data_5sigma

__10sigma: i<24.5__

In [None]:
data_10sigma = data_sn_10[data_sn_10['mag_i_truth_dp02'] < 24.5]

__Salvando os conjuntos de dados em arquivos .csv:__

In [None]:
data_5sigma.to_csv("/home/andreia.dourado/TCC/dp02_truth_obj/truth_obj/training_set_dp02_truth_errmodel_5sigma.csv")

In [None]:
data_10sigma.to_csv("/home/andreia.dourado/TCC/dp02_truth_obj/truth_obj/training_set_dp02_truth_errmodel_10sigma.csv")

In [None]:
spatial_distribution(data_5sigma)

In [None]:
spatial_distribution(data_10sigma)

In [None]:
redshift_hist(data_5sigma)
#plt.savefig('redshift_10sigma_dp01.png')

In [None]:
redshift_hist(data_10sigma)

In [None]:
mag_histogram(data_5sigma)

In [None]:
mag_histogram(data_10sigma)

In [None]:
color_color(data_10sigma)
#plt.savefig('corcor_10sigma_dp01.png')

In [None]:
plot_errors(data_5sigma)

In [None]:
plot_errors(data_10sigma)
#plt.savefig('erros_10sigma_dp01.png')