_Authors: Andreia Dourado, Bruno Moraes_

_Adapted from Melissa Graham example notebook.

__Description: This is a first notebook to create a training set of galaxies using a random sample from the Rubin Science Platform DP0.2 data__


In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tables_io
import qp
import sys 
from matplotlib import gridspec

### 1. Reading the data

In [None]:
data = pd.read_csv("/home/andreia.dourado/data/xmatching_dp02_object_vs_dp01_truth_random_sample.csv")

In [None]:
data

In [None]:
data.columns

### 2. Analysing the training set

#### 2.1 Funtions

In [None]:
def errors(catalog):
    bands = ['u','g', 'r', 'i']
    plt.figure(figsize=[18,4])
    for i, band in enumerate(bands): 
        plt.subplot(int(f'15{str(i+1)}'))
        query = f'mag_{band} != 99. & magerr_{band} < 2.'
        plt.plot(catalog.query(query)[f'mag_{band}'],
                 catalog.query(query)[f'magerr_{band}'], 
                 '.', alpha=0.3, color='steelblue')
        plt.xlabel(f'mag {band}')
        if i == 0: 
            plt.ylabel('error')
        plt.xlim(16, 28)    
        plt.ylim(0, 2)
        plt.tight_layout()
   

In [None]:
def mag_histogram(catalog, title='DP0.2'):
    bands = ['u','g', 'r', 'i', 'z','y']
    colors = ['blue', 'green', 'orange','red','purple','gray']
    plt.figure(figsize=(9,13))
    bins = np.linspace(9, 37, 57)
    j=1
    for i, (band, color) in enumerate(zip(bands,colors)):
        plt.subplot(3,2,j)
        plt.hist(catalog[f'mag_{band}'], histtype='stepfilled', bins=bins, label=f'{band} band', alpha = 0.5,
                 edgecolor = "black", color = color)
        #plt.xlim(16,27)
        plt.yscale('log')
        plt.xlabel('mag',fontsize=13)
        plt.ylabel('counts',fontsize=13)
        plt.legend(loc=2)
        plt.grid(True)
        j+=1
    plt.suptitle(title)
    plt.savefig('mag_histogram_training_set.png', format='png')
    plt.show()

In [None]:
def redshift_hist(catalog):
    plt.hist(catalog['redshift'], bins=np.linspace(0,3,200),density=True)
    plt.savefig('redshift_training_set.png', format='png')
    plt.show()

In [None]:
def mag_color(catalog):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    mag_diff = {}
    plt.figure(figsize=(9,13))
    i=1
    for band,_band in zip(bands, bands[1::]):
        plt.subplot(3,2,i)
        i+=1
        mag_diff_v = catalog[f'mag_{band}']-catalog[f'mag_{_band}']
        mag_v = catalog[f'mag_{band}']
        plt.hexbin(mag_v, mag_diff_v, None, mincnt=1, cmap='Reds', gridsize=[400,200], bins='log')
        plt.xlabel("mag "+band,fontsize=13)
        plt.ylabel(f"{band}-{_band}",fontsize=13)
        #plt.legend()
        plt.xlim(16,32)
        plt.ylim(-2,5)
        plt.grid(True)
        plt.tight_layout()
        plt.savefig('magColor_training_set.png', format='png')
    plt.show()

In [None]:
def color_color(catalog):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}']
        next_color = catalog[f'mag_{bands[index+2]}']
        past_color = catalog[f'mag_{bands[index]}']
        plt.hexbin(past_color-color,color-next_color, None, mincnt=1, cmap='turbo', gridsize=[400,200], bins='log')
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar()
        plt.xlim(-5,5)
        plt.ylim(-5,5)
        plt.savefig('colorColor_training_set.png', format='png')
    plt.show()

In [None]:
def color_color_red(catalog,xlim=[-5,5],ylim=[-5,5]):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}']
        next_color = catalog[f'mag_{bands[index+2]}']
        past_color = catalog[f'mag_{bands[index]}']
        plt.hexbin(past_color-color,color-next_color, C=catalog['redshift'], mincnt=1, cmap='turbo', gridsize=[400,200])
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=13)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=13)
        cbar = plt.colorbar(label='redshift')
        plt.xlim(xlim[0],xlim[1])
        plt.ylim(ylim[0],ylim[1])
        plt.savefig('colorColorRed_training_set.png', format='png')
    plt.show()

In [None]:
def spatial_distribution(catalog):
    plt.hist2d(data['ra'], data['dec'], bins=100)
    plt.xlabel('RA [deg]')
    plt.ylabel('Dec [deg]')
    plt.colorbar()
    plt.savefig('spatial_distribution.png', format='png')

#### 2.2 Plots

Spatial distribution

In [None]:
spatial_distribution(data)

Redshift distribution

In [None]:
redshift_hist(data)

Errors

In [None]:
errors(data)

Magnitude distribution

In [None]:
mag_histogram(data)

Mag-color

In [None]:
mag_color(data)

Color-color

In [None]:
color_color(data)