In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

import numpy as np
import pandas as pd

sys.path.append('..')
from env_config import DATA_PATH
from utils import save_fits
from data import read_fits_to_pandas, get_mag_str, process_2df
from evaluation import *

# Catalog

In [3]:
kids = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/KiDS.DR4.mag-pos.fits'),
                           columns=['ID', 'MAG_GAAP_r', 'RAJ2000', 'DECJ2000'])
print(kids.shape)

(100350873, 4)


In [4]:
ctlg_clf = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann_clf_f-all__2020-02-19_17:01:04.fits'))
ctlg_z_qso = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann_z_f-all_spec-qso__2020-02-20_13:31:58.fits'))

In [6]:
catalog = ctlg_clf.copy()
catalog[['QSO_Z_PHOTO', 'QSO_Z_PHOTO_STDDEV']] = ctlg_z_qso[['Z_PHOTO', 'Z_PHOTO_STDDEV']]
print(catalog.shape)

(45383661, 8)


In [7]:
# Add columns from KiDS, e.g. r magnitude for number counts, class star for middle values analysis
catalog = catalog.merge(kids, on='ID')
print(catalog.shape)

(45383663, 11)


In [9]:
# Save CSV
catalog_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann_f-all__2020-02-19.csv')
catalog.to_csv(catalog_path, index=False)

In [3]:
# Read CSV
catalog_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann_f-all__2020-02-19.csv')
catalog = pd.read_csv(catalog_path)

In [None]:
# Save FITS
catalog_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann_f-all__2020-02-19.fits')
save_fits(catalog, catalog_path)

In [8]:
dups = catalog.duplicated(subset=['ID'], keep=False)
print(dups.sum())

4


# Experiment results

In [3]:
# Read experiment results
preds_clf = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_f-all_test-magnitude__2019-12-11_17:54:01.csv')
preds_z_qso = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_f-all_test-magnitude_spec-qso__2019-12-12_12:33:02.csv')
preds_z_galaxy = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_f-all_test-magnitude_spec-galaxy__2019-12-12_12:34:24.csv')

In [4]:
# Add models class name to photometric redshift columns
preds_z_qso = preds_z_qso.rename(columns={'Z_PHOTO': 'QSO_Z_PHOTO', 'Z_PHOTO_STDDEV': 'QSO_Z_PHOTO_STDDEV'})
preds_z_galaxy = preds_z_galaxy.rename(columns={'Z_PHOTO': 'GALAXY_Z_PHOTO', 'Z_PHOTO_STDDEV': 'GALAXY_Z_PHOTO_STDDEV'})

In [5]:
# Merge predictions
preds = preds_clf.merge(preds_z_qso[['ID', 'QSO_Z_PHOTO', 'QSO_Z_PHOTO_STDDEV']], on=['ID'])
preds = preds.merge(preds_z_galaxy[['ID', 'GALAXY_Z_PHOTO', 'GALAXY_Z_PHOTO_STDDEV']], on=['ID'])

In [6]:
# Add KiDS coolumns
kids_x_sdss = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/KiDS.DR4.x.SDSS.DR14.fits'),
                                      ['ID', 'Z_B', 'CLASS_STAR'] + BAND_COLUMNS)
preds = preds.merge(kids_x_sdss, on=['ID'])

In [7]:
# Add 6QZ spectroscopic classification
kids_x_6qz = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/KiDS.DR4.x.6QZ.fits'), columns = ['ID', 'id1'])
kids_x_6qz = kids_x_6qz.rename(columns={'id1': '6QZ_id1'})
preds = preds.merge(kids_x_6qz, on=['ID'], how='left')

In [8]:
# Add 2SLAQ
kids_x_2slaq = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/KiDS.DR4.x.2SLAQ.QSO.fits'),
                                   columns = ['ID_1', 'ID_2a', 'z', 'qual'])
kids_x_2slaq = kids_x_2slaq.rename(columns={'ID_1': 'ID', 'ID_2a': '2SLAQ_ID', 'z': '2SLAQ_z', 'qual': '2SLAQ_qual'})
preds = preds.merge(kids_x_2slaq, on=['ID'], how='left')

In [9]:
preds.columns

Index(['ID', 'CLASS', 'Z', 'GALAXY_PHOTO', 'QSO_PHOTO', 'STAR_PHOTO',
       'CLASS_PHOTO', 'test_subset', 'QSO_Z_PHOTO', 'QSO_Z_PHOTO_STDDEV',
       'GALAXY_Z_PHOTO', 'GALAXY_Z_PHOTO_STDDEV', 'Z_B', 'CLASS_STAR',
       'MAG_GAAP_u', 'MAG_GAAP_g', 'MAG_GAAP_r', 'MAG_GAAP_i', 'MAG_GAAP_Z',
       'MAG_GAAP_Y', 'MAG_GAAP_J', 'MAG_GAAP_H', 'MAG_GAAP_Ks', '6QZ_id1',
       '2SLAQ_ID', '2SLAQ_z', '2SLAQ_qual'],
      dtype='object')

In [10]:
preds

Unnamed: 0,ID,CLASS,Z,GALAXY_PHOTO,QSO_PHOTO,STAR_PHOTO,CLASS_PHOTO,test_subset,QSO_Z_PHOTO,QSO_Z_PHOTO_STDDEV,...,MAG_GAAP_i,MAG_GAAP_Z,MAG_GAAP_Y,MAG_GAAP_J,MAG_GAAP_H,MAG_GAAP_Ks,6QZ_id1,2SLAQ_ID,2SLAQ_z,2SLAQ_qual
0,KiDSDR4 J114956.822+010846.96,GALAXY,0.534207,0.992363,0.007456,0.000181,GALAXY,top,2.019143,0.693261,...,20.362633,19.751524,19.483482,19.181873,18.666264,18.375544,,,,
1,KiDSDR4 J121345.818+014824.72,GALAXY,0.397798,0.987499,0.012218,0.000283,GALAXY,top,1.350140,0.689672,...,20.616844,20.086584,19.787380,19.384949,18.983282,18.702948,,,,
2,KiDSDR4 J115314.320+022725.71,GALAXY,0.661457,0.993560,0.006257,0.000183,GALAXY,top,1.390835,0.700925,...,20.321697,19.846567,19.577238,19.268234,18.947994,18.624397,,,,
3,KiDSDR4 J090736.028-004255.08,GALAXY,0.603146,0.999455,0.000533,0.000012,GALAXY,top,1.303888,0.708975,...,20.130110,19.642719,19.315298,18.897224,18.564531,18.221378,,,,
4,KiDSDR4 J103918.220-015517.74,GALAXY,0.728640,0.997348,0.002603,0.000050,GALAXY,top,1.269409,0.712185,...,20.068989,19.529633,19.101192,18.753311,18.242176,17.823534,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23358,KiDSDR4 J092330.207+020858.53,GALAXY,0.279085,0.901458,0.096288,0.002254,GALAXY,random,0.627324,0.330334,...,18.861637,18.438211,18.415812,18.130932,17.846735,17.573195,,,,
23359,KiDSDR4 J105155.143-005337.52,GALAXY,0.048573,0.999918,0.000068,0.000013,GALAXY,random,0.698886,0.362417,...,18.150942,18.044594,17.905134,17.832495,17.745382,17.942240,,,,
23360,KiDSDR4 J150654.525-004138.93,GALAXY,0.518560,0.986343,0.013217,0.000440,GALAXY,random,0.742388,0.359006,...,20.172960,19.736998,19.417065,19.174036,18.766550,18.421511,,,,
23361,KiDSDR4 J122554.288-014703.56,QSO,2.605478,0.000002,0.999986,0.000013,QSO,random,2.496545,0.392068,...,19.736294,19.956964,19.549042,19.768286,19.727482,19.641542,QSO,,,


In [11]:
# Save
preds.to_csv(os.path.join(DATA_PATH, 'share/KiDS_DR4_x_SDSS_DR14_ann_f-all_test-magnitude__2019-12-11_17:54:01.csv'))