# 

<img align="left" src = https://linea.org.br/wp-content/themes/LIneA/imagens/logo-header.jpg width=100 style="padding: 20px"> 

<img align="left" src = https://project.lsst.org/sites/default/files/Rubin-O-Logo_0.png width=160 style="padding: 20px">  

# QA DP0.2

**Contact author**: Heloisa da Silva Mengisztki ([heloisasmengisztki@gmail.com](mailto:heloisasmengisztki@gmail.com)) 

**Last verified run**: 2023-05-01 (YYYY-MM-DD) <br><br><br>

This notebook is a quality analysis of the dp0.2 data.

## Imports

In [None]:
import time
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 20)

%matplotlib inline

In [None]:
banda_cor = {
    "u": "r",
    "g": "m",
    "r": "b",
    "i": "c",
    "z": "g",
    "y": "y",
}

## Reading file SSH ?
## Reading file from Apollo

## Reading DP0.2 data - TAP - Rubin Science

### Tap service

In [None]:
from lsst.rsp import get_tap_service

service = get_tap_service()

assert service is not None
assert service.baseurl == "https://data.lsst.cloud/api/tap"

For this step, we are going to read 1k of galaxies, the, we are going to use it to try running bpz_rail. Here we are going to use some values present in the TAP tutorial notebook from rubin science platform for coordenates and radius. 

In [None]:
max_rec = 1000
use_center_coords = "62, -37"
use_radius = "1.0"

In [None]:
bands = ['g', 'i', 'r', 'u', 'y', 'z']

mags = ""
for band in bands:
    mags+= f"scisql_nanojanskyToAbMag({band}_cModelFlux) AS mag_{band}_cModel, {band}_cModelFluxErr, "

columns_query = f"objectId, {mags}coord_ra, coord_dec "

In [None]:
query = "SELECT " + columns_query + \
        "FROM dp02_dc2_catalogs.Object " + \
        "WHERE CONTAINS(POINT('ICRS', coord_ra, coord_dec), CIRCLE('ICRS', " + use_center_coords + ", " + use_radius + ")) = 1 " + \
        "AND detect_isPrimary = 1 " + \
        "AND r_extendedness = 1 " + \
        "AND scisql_nanojanskyToAbMag(r_cModelFlux) > 17.0 " + \
        "AND scisql_nanojanskyToAbMag(r_cModelFlux) < 23.0 "
print(query)

In [None]:
%%time
results = service.search(query, maxrec=max_rec)
print(type(results))
results = results.to_table()
print(type(results))
results_pd = results.to_pandas()
results_pd.info(memory_usage="deep")

In [None]:
results_pd.head()

## Métricas base

População: nesse caso a população seria todos os dados que são galáxias coletados pelo Blanco telescópio, toda a base de dados

Amostra: nesse caso pode ser ou o arquivo que vamos ler direto do diretório ou os dados do TAP ou a amostra definida pela Melissa

Variáveis: nesse caso seriam as características por exemplo no TAP de limite de banda

### Medidas de tendência central

Média - valor que representa o “meio” dos dados

Mediana - o meio de um conjunto organizado 

Moda - o número que mais se repete no conjunto

In [None]:
column = results_pd["mag_i_cModel"]

media = column.mean()
mediana = column.median()
moda = column.mode()
moda_round = column.round().mode()[0]

print("media: ", media)
print("mediana: ", mediana)
print("moda rounded: ", moda_round)
print("moda: ")
moda

In [None]:
plt.figure(figsize=[8,8])
plt.suptitle("Distribuição da amostra DP 0.2", fontsize=18)

plt.hist(column, bins="fd", color=f"{banda_cor.get('i')}")
plt.xlabel(f"magnitude (i_band)", fontsize=16)
plt.ylabel("qtd", fontsize=16)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.vlines(media, 0, 130, colors="r", label='media')
plt.vlines(mediana, 0, 130, colors="y", label='mediana')
plt.vlines(moda_round, 0, 130, colors="b", label='moda')
plt.legend(loc="upper left")
plt.tight_layout()

In [None]:
plt.figure(figsize=[16,10], dpi=300)
plt.suptitle("Magnitude x Erro (grizY)", fontsize=16)

pos_grafico = 1

for key, value in banda_cor.items():
    
    coluna = results_pd[f"mag_{key}_cModel"]

    media = coluna.mean()
    mediana = coluna.median()
    moda = coluna.mode()
    moda_round = coluna.round().mode()[0]
    
    plt.subplot(2,3,pos_grafico)
    plt.hist(column, bins="fd", color=f"{banda_cor.get(key)}")
    plt.xlabel(f"magnitude (i_band)", fontsize=16)
    plt.ylabel("qtd", fontsize=16)
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=15)
    plt.vlines(media, 0, 130, colors="r", label='media')
    plt.vlines(mediana, 0, 130, colors="y", label='mediana')
    plt.vlines(moda_round, 0, 130, colors="b", label='moda')
    plt.legend(loc="upper left")
    plt.tight_layout()

    pos_grafico = pos_grafico + 1

### Medidas de dispersão 

amplitude - diferença entre o maior número e o menor número de um conjunto. 

variância - relaciona os desvios da média

desvio padrão - indica quão uniforme é um conjunto de dados.

In [None]:
amplitude = np.absolute(column.max() - column.min())
variancia = column.var()
desvio = column.std()

print("amplitude: ", amplitude)
print("variância: ", variancia)
print("desvio padrão: ", desvio)

In [None]:
### Medidas separatrizes

Quartil 

Percentil

Decil

In [None]:
#quartil = column.quantile()
#percentile = np.percentile(column)
#decil = np.percentile(column)

#print("Quartil: ", quartil)
#print("Percentil:", percentile)
#print("Decil: ", decil)

## Plot - Localização amostra

In [None]:
plt.figure(figsize=[8,8])
plt.suptitle("Localização da amostra DP 0.2", fontsize=18)

plt.plot(results_pd.coord_ra, results_pd.coord_dec, f".", color="midnightblue", alpha=0.2)
plt.xlabel("RA", fontsize=18)
plt.ylabel("DEC", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xticks(fontsize=16)
plt.tight_layout()

## Hist - Qtd Magnitude

In [None]:
def monta_grafico_hist(pos, key):
    plt.subplot(2,3,pos)
    plt.hist(results_pd[f"mag_{key}_cModel"], bins="fd", color=f"{banda_cor.get(key)}")
    plt.xlabel(f"magnitude ({key}_band)", fontsize=16)
    plt.ylabel("qtd", fontsize=16)
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=15)
    plt.tight_layout()

In [None]:
plt.figure(figsize=[16,10], dpi=300)
plt.suptitle("Magnitude x Quantidade (em cada banda)", fontsize=16)

pos_grafico = 1

for key, value in banda_cor.items():
    monta_grafico_hist(pos_grafico, key)
    pos_grafico = pos_grafico + 1

## Plot - Magnitude x Erro 

In [None]:
plt.figure(figsize=[16,10], dpi=300)
plt.suptitle("Magnitude x Erro (grizY)", fontsize=16)

pos_grafico = 1

for key, value in banda_cor.items():
    plt.subplot(2,3,pos_grafico)
    plt.plot(results_pd[f"mag_{key}_cModel"], results_pd[f"{key}_cModelFluxErr"], f"{value}.", alpha=0.5)
    plt.xlabel(f"magnitude ({key}-band)", fontsize=14)
    plt.ylabel("erro", fontsize=14)
#    plt.xlim(0, 40)
 #   plt.ylim(0, 120)
    plt.tight_layout()

    pos_grafico = pos_grafico + 1

## Plot S/N x magnitude

In [None]:
def mag_sr(pos, key, x_min=15, x_max=30, y_min=0, y_max=60):
    plt.subplot(2,3,pos)
    plt.plot(results_pd[f"mag_{key}_cModel"], 1/results_pd[f"{key}_cModelFluxErr"], f"{banda_cor.get(key)}.", alpha=0.2, label=key)

    plt.xlabel(f"magnitude ({key}_band)", fontsize=16)
    plt.ylabel("S/R", fontsize=16)

    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)
    #plt.hlines(10, 0, 50)

In [None]:
plt.figure(figsize=[16,10], dpi=300)
plt.suptitle("Magnitude x Sinal Ruido (grizY)", fontsize=16)

bandas = list(banda_cor)

mag_sr(1, bandas[0], 20, 28, 0, 15)
mag_sr(2, bandas[1], 21, 27, 0, 30)
mag_sr(3, bandas[2], 21, 26, 0, 30)
mag_sr(4, bandas[3], 21, 26, 0, 30)
mag_sr(5, bandas[4], 20, 26, 0, 30)
mag_sr(6, bandas[5], 19, 26, 0, 20)

plt.tight_layout()

## Hist - distribuição de cor

In [None]:
def qtd_cor(key1, key2):
    plt.hist(results_pd[f"mag_{key1}_cModel"]-results_pd[f"mag_{key2}_cModel"], bins="fd", alpha=0.2, label=f"{key1}-{key2}")

    plt.xlabel(f"color ({key1}-{key2})", fontsize=16)
    plt.ylabel("qtd", fontsize=16)

    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)
    
    plt.vlines(0, 0, 200)

In [None]:
plt.figure(figsize=[16,5], dpi=300)
plt.suptitle("Quantidade x Cor", fontsize=16)

plt.subplot(1,3,1)
qtd_cor("g", "r")

plt.subplot(1,3,2)
qtd_cor("r", "i")

plt.subplot(1,3,3)
qtd_cor("i", "z")

plt.tight_layout()

## Plot - cor cor

In [None]:
def cor_cor(key1, key2, key3, key4):
    plt.plot(results_pd[f"mag_{key1}_cModel"]-results_pd[f"mag_{key2}_cModel"], results_pd[f"mag_{key3}_cModel"]-results_pd[f"mag_{key4}_cModel"], f"{banda_cor.get(key1)}.", alpha=0.2)

    plt.xlabel(f"color ({key1}-{key2})", fontsize=16)
    plt.ylabel(f"color ({key3}-{key4})", fontsize=16)

    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)
    
    plt.vlines(0, -0.5, 2)
    plt.hlines(0, -0.5, 2)


In [None]:
plt.figure(figsize=[16,10], dpi=300)
plt.suptitle("Cor x Cor", fontsize=16)

cor_cor("r", "i", "g", "r")

plt.tight_layout()