_Authors: Andreia Dourado, Bruno Moraes_

_Adapted from Melissa Graham example notebook.

__Description: This is a first notebook to create a training set of galaxies using a random sample from the Rubin Science Platform DP0.2 data__


In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#import tables_io
#import qp
#import sys 
from matplotlib import gridspec

### 1. Reading the data

In [None]:
dataset = pd.read_csv("/home/andreia.dourado/xmatching_dp01_dp02.csv")

In [None]:
dataset

Aplicando a flag detect_isPrimary:

In [None]:
data = dataset[dataset['detect_isPrimarydp02_object'] == True]

In [None]:
data

In [None]:
data.columns

### Conversões flux to mag

Convertendo os fluxos para magnitudes:

In [None]:
bands = ['u','g', 'r', 'i','z','y']
for f, filt in enumerate(bands):
    data[f'mag_{filt}'] = -2.5 * np.log10(data[f'{filt}_cModelFluxdp02_object']) + 31.4

In [None]:
data

Convertendo os erros de fluxo para erros de magnitudes:

In [None]:
bands = ['u','g', 'r', 'i','z','y']
for f, filt in enumerate(bands):
    data[f'magerr_{filt}'] = (2.5/np.log(10)) * (data[f'{filt}_cModelFluxErrdp02_object']/data[f'{filt}_cModelFluxdp02_object'])

In [None]:
data

### 2. Analysing the training set

#### 2.1 Funtions

In [None]:
def plot_errors(catalog, title='Errors', gridsize=[400, 200], bins='log', cmap='inferno', xlim=[20, 30], ylim=[0, 100], sigma=0, pop='n'):
    bands = ['u', 'g', 'r', 'i', 'z', 'y']
    fig, axes = plt.subplots(3, 2, figsize=[12, 16])
    fig.suptitle(title, fontsize=16)
    for ax, band in zip(axes.flatten(), bands):
        mag = np.array(catalog[f'mag_{band}'])
        err = np.array(catalog[f'magerr_{band}'])
        sn = 1 / (10 ** (0.4 * err) - 1)
        ax.hexbin(mag, sn, gridsize=gridsize, cmap=cmap, bins=bins, mincnt=1)
        ax.set_ylabel("S/N", fontsize=14)
        ax.set_xlabel(f"mag {band}", fontsize=14)
        ax.set_ylim(ylim)
        ax.set_xlim(xlim)
        ax.axhline(5, color='black', label='5σ', linestyle='--')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.legend(fontsize=12)
    fig.tight_layout(rect=[0, 0, 1, 0.97])
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/erroslog_{sigma}sigma_{pop}populacao.png', format='png')
    plt.show()

In [None]:
def errors(catalog, sigma=0, pop=''):
    bands = ['u','g', 'r', 'i']
    plt.figure(figsize=[18,4])
    for i, band in enumerate(bands): 
        plt.subplot(int(f'15{str(i+1)}'))
        query = f'mag_{band} != 99. & magerr_{band} < 2.'
        plt.plot(catalog[f'mag_{band}'],
                 catalog[f'magerr_{band}'], 
                 '.', alpha=0.3, color='steelblue')
        plt.xlabel(f'mag {band}')
        if i == 0: 
            plt.ylabel('error')
        #plt.xlim(16, 28)    
        plt.ylim(0, 2)
        plt.tight_layout()
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/erros_{sigma}sigma_{pop}populacao.png', format='png')


In [None]:
def mag_histogram(catalog, title='DP0.2', sigma=0,pop=''):
    bands = ['u','g', 'r', 'i', 'z','y']
    colors = ['blue', 'green', 'orange','red','purple','gray']
    plt.figure(figsize=(9,13))
    bins = np.linspace(9, 80, 57)
    j=1
    for i, (band, color) in enumerate(zip(bands,colors)):
        plt.subplot(3,2,j)
        plt.hist(catalog[f'mag_{band}'], histtype='stepfilled', bins=bins, label=f'{band} band', alpha = 0.5,
                 edgecolor = "black", color = color)
        plt.xlim(0,100)
        plt.yscale('log')
        plt.xlabel('mag',fontsize=13)
        plt.ylabel('counts',fontsize=13)
        plt.legend(loc=2)
        plt.grid(True)
        j+=1
    plt.suptitle(title)
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/mag_hist_{sigma}sigma_{pop}populacao.png', format='png')
    plt.show()

In [None]:
def redshift_hist(catalog, sigma=0,pop=''):
    plt.hist(catalog['redshiftdp01_test_truth'], bins=np.linspace(0,3,200),density=True)
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/redshift_{sigma}sigma_{pop}populacao.png', format='png')
    plt.show()

In [None]:
def mag_color(catalog, sigma=0,pop=''):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    mag_diff = {}
    plt.figure(figsize=(9,13))
    i=1
    for band,_band in zip(bands, bands[1::]):
        plt.subplot(3,2,i)
        i+=1
        mag_diff_v = catalog[f'mag_{band}']-catalog[f'mag_{_band}']
        mag_v = catalog[f'mag_{band}']
        plt.hexbin(mag_v, mag_diff_v, None, mincnt=1, cmap='Reds', gridsize=[400,200], bins='log')
        plt.xlabel("mag "+band,fontsize=13)
        plt.ylabel(f"{band}-{_band}",fontsize=13)
        #plt.legend()
        plt.xlim(16,32)
        plt.ylim(-2,5)
        plt.grid(True)
        plt.tight_layout()
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/mag_cor_{sigma}sigma_{pop}populacao.png', format='png')
    plt.show()

In [None]:
def color_color(catalog, sigma=0,pop=''):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}']
        next_color = catalog[f'mag_{bands[index+2]}']
        past_color = catalog[f'mag_{bands[index]}']
        plt.hexbin(past_color-color,color-next_color, None, mincnt=1, cmap='turbo', gridsize=[400,200], bins='log')
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar()
        #plt.xlim(-5,5)
        #plt.ylim(-5,5)
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/cor_cor_{sigma}sigma_{pop}populacao.png', format='png')
    plt.show()

In [None]:
def color_color_red(catalog,xlim=[-5,5],ylim=[-5,5], sigma=0, pop=''):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}']
        next_color = catalog[f'mag_{bands[index+2]}']
        past_color = catalog[f'mag_{bands[index]}']
        plt.hexbin(past_color-color,color-next_color, C=catalog['redshift'], mincnt=1, cmap='turbo', gridsize=[400,200])
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar(label='redshift')
        plt.xlim(xlim[0],xlim[1])
        plt.ylim(ylim[0],ylim[1])
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/cor_cor_red_{sigma}sigma_{pop}populacao.png', format='png')
    plt.show()

In [None]:
def spatial_distribution(catalog, sigma=0, pop=''):
    plt.hist2d(catalog['coord_radp02_object'], catalog['coord_decdp02_object'], bins=100)
    plt.xlabel('RA [deg]')
    plt.ylabel('Dec [deg]')
    plt.colorbar()
    if sigma != 0:
        plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/{sigma}sigma/{pop}_populacao/sky_{sigma}sigma_{pop}populacao.png', format='png')

#### 2.2 Plots

Spatial distribution

In [None]:
spatial_distribution(data, sigma=5, pop='com')

Redshift distribution

In [None]:
redshift_hist(data,sigma=5, pop='com')

Errors

In [None]:
errors(data,sigma=5, pop='com')

In [None]:
plot_errors(data)

Magnitude distribution

In [None]:
mag_histogram(data,sigma=5, pop='com')

Mag-color

In [None]:
mag_color(data,sigma=5, pop='com')

Color-color

In [None]:
color_color(data,sigma=5, pop='com')

### Cortes de magnitude - RomanRubin

#### 1. Corte em SN>10

In [None]:
data_sn_10 = data[data['magerr_i'] < 2.5*np.log10(1+0.1)]

In [None]:
data_sn_10

#### 2. Corte em SN>5

In [None]:
data_sn_5 = data[data['magerr_i'] < 2.5*np.log10(1+0.2)]

In [None]:
data_sn_5

#### Histograma de magnitudes

In [None]:
sigma = 10

plt.figure(figsize=(10, 6))

hist_color = '#4c72b0'
line_color = '#d62728'

if sigma == 5:
    c = plt.hist(data_sn_5['mag_i'], bins=300, log=True, color=hist_color, alpha=0.7)  # Removi edgecolor
    plt.axvline(25.6, color=line_color, ls=':', linewidth=1, label=f'$i = 25.6$')
elif sigma == 10:
    c = plt.hist(data_sn_10['mag_i'], bins=300, log=True, color=hist_color, alpha=0.7)  # Removi edgecolor
    plt.axvline(24.7, color=line_color, ls=':', linewidth=1, label=f'$i = 24.7$')

plt.title(fr'i-band Histogram for SNR $> {sigma}\sigma$', fontsize=18, weight='bold', pad=20)
plt.xlabel(r'$i$-band Magnitude', fontsize=16)
plt.ylabel('Number of Galaxies (log scale)', fontsize=16)
plt.legend(loc='upper left', fontsize=14, frameon=True, shadow=True)

plt.grid(True, which='both', linestyle='--', linewidth=0.5)

plt.tight_layout()
plt.savefig(f'/home/andreia.dourado/TCC/rubin_roman_steps/hist_i_{sigma}sigma.png', dpi=300)

plt.show()

#### 3. Cortes de magnitude

##### 5Sigma: i < 25.6

In [None]:
data_5sigma = data_sn_5[data_sn_5['mag_i'] < 25.6]

In [None]:
data_5sigma

##### 10sigma: i<24.7

In [None]:
data_10sigma = data_sn_10[data_sn_10['mag_i'] < 24.7]

In [None]:
data_10sigma

#### 4. QA dos samples com cortes

In [None]:
spatial_distribution(data_5sigma_cut,sigma=10)

In [None]:
redshift_hist(data_10sigma_cut,sigma=10)

In [None]:
mag_histogram(dataset,sigma=10)

In [None]:
color_color(data_10sigma,sigma=10)

#### Retirando a população

In [None]:
test = test[test['mag_y'] < 60]

In [None]:
test

In [None]:
mag_histogram(test)

In [None]:
for band in bands:
    data_10sigma_cut = data_10sigma_cut[data_10sigma_cut[f'mag_{band}'] < 60]

In [None]:
data_10sigma_cut

In [None]:
mag_histogram(data_10sigma_cut)

In [None]:
test.to_csv('/home/andreia.dourado/data/5sigma_sem_pop.csv')