_Authors: Andreia Dourado, Bruno Moraes_

_Adapted from Melissa Graham example notebook.

__Description: This is a first notebook to create a training set of galaxies using a random sample from the Rubin Science Platform DP0.2 data__


In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tables_io
import qp
import sys 
from matplotlib import gridspec
import dustmaps
from dustmaps.sfd import SFDQuery
from astropy.coordinates import SkyCoord
from dustmaps.config import config
config['data_dir'] = '/home/andreia.dourado/ic-photoz/andreia_dourado/'

### 1. Reading the data

In [None]:
data_redden=pd.read_csv("/home/andreia.dourado/ic-photoz/andreia_dourado/training_set.csv")

In [None]:
data = pd.read_csv("/home/andreia.dourado/ic-photoz/andreia_dourado/training_set.csv")

In [None]:
data

In [None]:
bands = ['u','g', 'r', 'i']
plt.figure(figsize=[18,4])
for i, band in enumerate(bands): 
    plt.subplot(int(f'15{str(i+1)}'))
    query = f'mag_{band} != 99. & magerr_{band} < 2.'
    plt.plot(data_training_set.query(query)[f'mag_{band}'],
             data_training_set.query(query)[f'magerr_{band}'], 
             '.', alpha=0.3, color='steelblue')
    plt.xlabel(f'mag {band}')
    if i == 0: 
        plt.ylabel('error')
    plt.xlim(16, 28)    
    plt.ylim(0, 2)
    plt.tight_layout()

### 2. Correcting extinction
Sam Schmidt example notebook: https://github.com/LSSTDESC/DC2-analysis/blob/master/contributed/DC2_object_deredden.ipynb 

Jeff Carlin -delegate-contributions-dp02:https://github.com/LSSTDESC/DC2-analysis/blob/master/contributed/DC2_object_deredden.ipynb

In [None]:
# set the A_lamba/E(B-V) values for the six LSST filters 
band_a_ebv = np.array([4.81,3.64,2.70,2.06,1.58,1.31])

In [None]:
coords = c = SkyCoord(data['ra'], data['dec'], unit = 'deg',frame='icrs')

In [None]:
import dustmaps.sfd
dustmaps.sfd.fetch()

In [None]:
sfd = SFDQuery()
ebvvec = sfd(coords)
data['ebv'] = ebvvec

In [None]:
for i,band in enumerate(['u','g','r','i','z','y']):
    data[f'mag_{band}_dered'] = data[f'mag_{band}']-data['ebv']*band_a_ebv[i]

In [None]:
plt.hist(data['ebv']*band_a_ebv[3], bins=np.linspace(0,3,200),density=True)
plt.show()

In [None]:
fig = plt.figure(figsize=(10,10))
plt.scatter(data['mag_g']-data['mag_r'],data['mag_r']-data['mag_i'],s=10,c='r',label="stars before dered")
plt.xlim(-5,5)
plt.ylim(-5,5)
plt.scatter(data['mag_g_dered']-data['mag_r_dered'],data['mag_r_dered']-data['mag_i_dered'],s=10,c='dodgerblue',label='dereddened stars')
#plt.scatter(stardf['gmr'],stardf['rmi'],s=20,c='purple',label ="truth star colors with MW dust")
#plt.scatter(stardf['gmr_nomw'],stardf['rmi_nomw'],s=20,c='k',label ="truth star colors with no MW dust" )
plt.xlim(-5,5)
plt.ylim(-5,5)
plt.xlabel("g-r",fontsize=18)
plt.ylabel("r-i",fontsize=18)
plt.legend(loc='lower right',fontsize=16);

In [None]:
fig = plt.figure(figsize=(10,10))
plt.scatter(data['mag_g']-data['mag_r'],data['mag_i']-data['mag_z'],s=10,c='r',label="stars before dereddening")
plt.scatter(data['mag_g_dered']-data['mag_r_dered'],data['mag_i_dered']-data['mag_z_dered'],s=10,c='dodgerblue',label='stars after dereddening')
plt.xlim(-5.,5.)
plt.ylim(-5.,5.)
plt.xlabel("g-r",fontsize=18)
plt.ylabel("i-z",fontsize=18)
plt.legend(loc='lower right',fontsize=16);

### 4. Saving the file

In [None]:
data_training_set=data[['objectId','ra','dec','redshift','mag_u_dered','mag_g_dered','mag_r_dered','mag_i_dered','mag_z_dered','mag_y_dered','magerr_u','magerr_g','magerr_r','magerr_i','magerr_z','magerr_y']]

In [None]:
data_training_set

In [None]:
columns_map = {
    "mag_u_dered": "mag_u",
    "mag_g_dered": "mag_g",
    "mag_r_dered": "mag_r",
    "mag_i_dered": "mag_i",
    "mag_y_dered": "mag_y",
    "mag_z_dered": "mag_z",
}
data_training_set = data_training_set.rename(columns=columns_map)


In [None]:
data_training_set

In [None]:
data_training_set.to_csv('data_traning_set.csv')

### 2. Analysing the training set

#### 2.1 Funtions

In [None]:
#def plot_errors(catalog):
#bands = ['u','g', 'r', 'i']
#plt.figure(figsize=[18,4])
#for i, band in enumerate(bands): 
   # plt.subplot(int(f'15{str(i+1)}'))
   # query = f'mag_{band} != 99. & magerr_{band} < 2.'
   # plt.plot(data_training_set.query(query)[f'mag_{band}'],
         #    data_training_set.query(query)[f'magerr_{band}'], 
         #    '.', alpha=0.3, color='steelblue')
   # plt.xlabel(f'mag {band}')
   # if i == 0: 
        #plt.ylabel('error')
   # plt.xlim(16, 28)    
   # plt.ylim(0, 2)
   # plt.tight_layout()
   

In [None]:
def mag_histogram(catalog, title='DP0.2'):
    bands = ['u','g', 'r', 'i', 'z','y']
    colors = ['blue', 'green', 'orange','red','purple','gray']
    plt.figure(figsize=(9,13))
    bins = np.linspace(9, 37, 57)
    j=1
    for i, (band, color) in enumerate(zip(bands,colors)):
        plt.subplot(3,2,j)
        plt.hist(catalog[f'mag_{band}'], histtype='stepfilled', bins=bins, label=f'{band} band', alpha = 0.5,
                 edgecolor = "black", color = color)
        #plt.xlim(16,27)
        plt.yscale('log')
        plt.xlabel('mag',fontsize=13)
        plt.ylabel('counts',fontsize=13)
        plt.legend(loc=2)
        plt.grid(True)
        j+=1
    plt.suptitle(title)
    plt.savefig('mag_histogram_training_set.png', format='png')
    plt.show()

In [None]:
def redshift_hist(catalog):
    plt.hist(catalog['redshift'], bins=np.linspace(0,3,200),density=True)
    plt.savefig('redshift_training_set.png', format='png')
    plt.show()

In [None]:
def mag_color(catalog):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    mag_diff = {}
    plt.figure(figsize=(9,13))
    i=1
    for band,_band in zip(bands, bands[1::]):
        plt.subplot(3,2,i)
        i+=1
        mag_diff_v = catalog[f'mag_{band}']-catalog[f'mag_{_band}']
        mag_v = catalog[f'mag_{band}']
        plt.hexbin(mag_v, mag_diff_v, None, mincnt=1, cmap='Reds', gridsize=[400,200], bins='log')
        plt.xlabel("mag "+band,fontsize=13)
        plt.ylabel(f"{band}-{_band}",fontsize=13)
        #plt.legend()
        plt.xlim(16,32)
        plt.ylim(-2,5)
        plt.grid(True)
        plt.tight_layout()
        plt.savefig('magColor_training_set.png', format='png')
    plt.show()

In [None]:
def color_color(catalog):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}']
        next_color = catalog[f'mag_{bands[index+2]}']
        past_color = catalog[f'mag_{bands[index]}']
        plt.hexbin(past_color-color,color-next_color, None, mincnt=1, cmap='turbo', gridsize=[400,200], bins='log')
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar()
        plt.xlim(-5,5)
        plt.ylim(-5,5)
        plt.savefig('colorColor_training_set.png', format='png')
    plt.show()

In [None]:
def color_color_red(catalog,xlim=[-5,5],ylim=[-5,5]):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}']
        next_color = catalog[f'mag_{bands[index+2]}']
        past_color = catalog[f'mag_{bands[index]}']
        plt.hexbin(past_color-color,color-next_color, C=catalog['redshift'], mincnt=1, cmap='turbo', gridsize=[400,200])
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar(label='redshift')
        plt.xlim(xlim[0],xlim[1])
        plt.ylim(ylim[0],ylim[1])
        plt.savefig('colorColorRed_training_set.png', format='png')
    plt.show()

In [None]:
def spatial_distribution(catalog):
    plt.hist2d(data['ra'], data['dec'], bins=100)
    plt.xlabel('RA [deg]')
    plt.ylabel('Dec [deg]')
    plt.colorbar()
    plt.savefig('spatial_distribution.png', format='png')

#### 2.2 Plots

Spatial distribution

In [None]:
spatial_distribution(data_training_set)

Redshift distribution

In [None]:
redshift_hist(data_training_set)

Errors

In [None]:
bands = ['u','g', 'r', 'i']
plt.figure(figsize=[18,4])
for i, band in enumerate(bands): 
    plt.subplot(int(f'15{str(i+1)}'))
    query = f'mag_{band} != 99. & magerr_{band} < 2.'
    plt.plot(data_training_set.query(query)[f'mag_{band}'],
             data_training_set.query(query)[f'magerr_{band}'], 
             '.', alpha=0.3, color='steelblue')
    plt.xlabel(f'mag {band}')
    if i == 0: 
        plt.ylabel('error')
    plt.xlim(16, 28)    
    plt.ylim(0, 2)
    plt.tight_layout()

Magnitude distribution

In [None]:
mag_histogram(data_training_set)

Mag-color

In [None]:
mag_color(data_training_set)

Color-color

In [None]:
color_color(data_training_set)

In [None]:
color_color_red(data_training_set)