In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline  

import sys

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

sys.path.append('..')
from utils import *
from data import *

In [3]:
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 100

sns.set(style='ticks')
sns.set_palette('cubehelix')

In [4]:
path = '/media/snakoneczny/data/KiDS/DR4/KiDS.DR4.x.SDSS.DR14.fits'

In [5]:
data = read_fits_to_pandas(path)

In [7]:
# Drop NANs
data_no_na = data.dropna(subset=get_mags()).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 130777 (85.90%) left


In [9]:
# Drop NANs
data_no_na = data.dropna(subset=['MAG_GAAP_r']).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 149403 (98.13%) left


In [10]:
# Drop NANs
data_no_na = data.dropna(subset=['MAG_GAAP_g', 'MAG_GAAP_r', 'MAG_GAAP_i']).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 149198 (98.00%) left


In [8]:
# Drop NANs
data_no_na = data.dropna(subset=['MAG_GAAP_u', 'MAG_GAAP_g', 'MAG_GAAP_r', 'MAG_GAAP_i']).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 138570 (91.02%) left


In [14]:
# Drop NANs
cols = ['MAG_GAAP_Z']
data_no_na = data.dropna(subset=cols).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 145508 (95.58%) left


In [13]:
# Drop NANs
cols = ['MAG_GAAP_Ks']
data_no_na = data.dropna(subset=cols).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 144597 (94.98%) left


In [19]:
# Drop NANs
cols = ['MAG_GAAP_Z', 'MAG_GAAP_Y', 'MAG_GAAP_J', 'MAG_GAAP_H', 'MAG_GAAP_Ks']
data_no_na = data.dropna(subset=cols).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 143704 (94.39%) left


In [21]:
data.columns

Index(['ID', 'RAJ2000', 'DECJ2000', 'Flag', 'SG2DPHOT', 'MAG_GAAP_u',
       'MAG_GAAP_g', 'MAG_GAAP_r', 'MAG_GAAP_i', 'MAG_GAAP_Z', 'MAG_GAAP_Y',
       'MAG_GAAP_J', 'MAG_GAAP_H', 'MAG_GAAP_Ks', 'COLOUR_GAAP_u_g',
       'COLOUR_GAAP_g_r', 'COLOUR_GAAP_r_i', 'COLOUR_GAAP_i_Z',
       'COLOUR_GAAP_Z_Y', 'COLOUR_GAAP_Y_J', 'COLOUR_GAAP_J_H',
      dtype='object')

In [22]:
# Drop NANs
cols = ['MAG_GAAP_g', 'MAG_GAAP_r', 'MAG_GAAP_i', 'COLOUR_GAAP_g_r', 'COLOUR_GAAP_r_i', 'COLOUR_GAAP_i_Z',
        'COLOUR_GAAP_Z_Y', 'COLOUR_GAAP_Y_J', 'COLOUR_GAAP_J_H', 'COLOUR_GAAP_H_Ks']
data_no_na = data.dropna(subset=cols).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 140839 (92.51%) left


In [17]:
# Drop NANs
cols = get_mags() + get_colors()
data_no_na = data.dropna(subset=cols).reset_index(drop=True)

n_left = data_no_na.shape[0]
p_left = data_no_na.shape[0] / data.shape[0] * 100
print('Droping NANs: {} ({:.2f}%) left'.format(n_left, p_left))

Droping NANs: 130777 (85.90%) left


In [None]:


    mask = [1] * data_no_na.shape[0]

    # Remove errors
    for b in BANDS_DR3:
        mask &= (data_no_na['MAGERR_GAAP_{}'.format(b)] < 1)
    if with_print:
        n_left = mask.sum()
        p_left = mask.sum() / data.shape[0] * 100
        print('Removing errors bigger than 1: {} ({:.2f}%) left'.format(n_left, p_left))

    # Survey limiting magnitudes
    mask &= (
            (data_no_na['MAG_GAAP_U'] < 24.3) &
            (data_no_na['MAG_GAAP_G'] < 25.1) &
            (data_no_na['MAG_GAAP_R'] < 24.9) &
            (data_no_na['MAG_GAAP_I'] < 23.8)
    )
    if with_print:
        n_left = mask.sum()
        p_left = mask.sum() / data.shape[0] * 100
        print('Removing limiting magnitudes: {} ({:.2f}%) left'.format(n_left, p_left))

    # Remove flags
    # for c in FLAGS:
    #     mask &= (data_no_na[c] == 0)
    # if with_print: print('Removing flags: {} left'.format(mask.sum()))

    # Remove ima-flags
    flag_mask = 0b01111111
    for c in IMA_FLAGS:
        mask &= (data_no_na[c] & flag_mask == 0)
    if with_print:
        n_left = mask.sum()
        p_left = mask.sum() / data.shape[0] * 100
        print('Removing IMA flags: {} ({:.2f}%) left'.format(n_left, p_left))

    # Tile flag
    # mask &= (data_no_na['TILE_FLAG'] == 0)
    # if with_print: print('Removing tile flag: {} left'.format(mask.sum()))

    return data_no_na.loc[mask].reset_index(drop=True)