In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

import numpy as np
import pandas as pd

sys.path.append('..')
from env_config import DATA_PATH
from utils import save_fits
from data import read_fits_to_pandas, get_mag_str, process_2df, merge_specialized_catalogs
from evaluation import *

# Catalog

In [3]:
ctlg_clf_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann_clf_f-all__2020-06-08_17:07:35.fits')
ctlg_z_qso_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann_z_f-all_spec-qso__2020-06-08_16:22:38.fits')

ctlg_clf = read_fits_to_pandas(ctlg_clf_path)
ctlg_z_qso = read_fits_to_pandas(ctlg_z_qso_path)

print(ctlg_clf.shape)

(45469955, 11)


In [4]:
ctlg_clf[['QSO_Z_PHOTO', 'QSO_Z_PHOTO_STDDEV']] = ctlg_z_qso[['Z_PHOTO', 'Z_PHOTO_STDDEV']]

In [5]:
ctlg_clf = ctlg_clf[['ID', 'RAJ2000', 'DECJ2000', 'GALAXY_PHOTO', 'QSO_PHOTO', 'STAR_PHOTO',
                     'QSO_Z_PHOTO', 'QSO_Z_PHOTO_STDDEV']]

In [9]:
# Save CSV
catalog_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann__2020-06-08.csv')
ctlg_clf.to_csv(catalog_path, index=False)

In [None]:
# Save FITS
catalog_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann__2020-06-08.fits')
save_fits(ctlg_clf, catalog_path)

In [3]:
# Read CSV
catalog_path = os.path.join(DATA_PATH, 'KiDS/DR4/catalogs/KiDS_DR4_x_SDSS_DR14_ann__2020-06-08.csv')
catalog = pd.read_csv(catalog_path)

In [None]:
dups = ctlg_clf.duplicated(subset=['ID'], keep=False)
print(dups.sum())

# Experiment results

In [3]:
# Read experiment results
preds_clf = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_clf_f-all__2020-06-08_12:08:59.csv')
preds_z_qso = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_z_f-all_spec-qso__2020-06-06_10:08:06.csv')
preds_z_galaxy = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_z_f-all_spec-galaxy__2020-06-06_12:22:05.csv')

In [3]:
# Read experiment results
preds_clf = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_f-all_test-magnitude__2019-12-11_17:54:01.csv')
preds_z_qso = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_f-all_test-magnitude_spec-qso__2019-12-12_12:33:02.csv')
preds_z_galaxy = pd.read_csv('../outputs/exp_preds/KiDS_DR4_x_SDSS_DR14_ann_f-all_test-magnitude_spec-galaxy__2019-12-12_12:34:24.csv')

In [4]:
# Add models class name to photometric redshift columns
preds_z_qso = preds_z_qso.rename(columns={'Z_PHOTO': 'QSO_Z_PHOTO', 'Z_PHOTO_STDDEV': 'QSO_Z_PHOTO_STDDEV'})
preds_z_galaxy = preds_z_galaxy.rename(columns={'Z_PHOTO': 'GALAXY_Z_PHOTO', 'Z_PHOTO_STDDEV': 'GALAXY_Z_PHOTO_STDDEV'})

In [5]:
# Merge predictions
preds = preds_clf.merge(preds_z_qso[['ID', 'QSO_Z_PHOTO', 'QSO_Z_PHOTO_STDDEV']], on=['ID'])
preds = preds.merge(preds_z_galaxy[['ID', 'GALAXY_Z_PHOTO', 'GALAXY_Z_PHOTO_STDDEV']], on=['ID'])

In [6]:
# Add KiDS coolumns
kids_x_sdss = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/KiDS.DR4.x.SDSS.DR14.fits'),
                                  ['ID', 'Z_B', 'CLASS_STAR'] + BAND_COLUMNS)
preds = preds.merge(kids_x_sdss, on=['ID'])

In [7]:
# Add 6QZ spectroscopic classification
kids_x_6qz = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/KiDS.DR4.x.6QZ.fits'), columns = ['ID', 'id1'])
kids_x_6qz = kids_x_6qz.rename(columns={'id1': '6QZ_id1'})
preds = preds.merge(kids_x_6qz, on=['ID'], how='left')

In [8]:
# Add 2SLAQ
kids_x_2slaq = read_fits_to_pandas(os.path.join(DATA_PATH, 'KiDS/DR4/KiDS.DR4.x.2SLAQ.QSO.fits'),
                                   columns = ['ID_1', 'ID_2a', 'z', 'qual'])
kids_x_2slaq = kids_x_2slaq.rename(columns={'ID_1': 'ID', 'ID_2a': '2SLAQ_ID', 'z': '2SLAQ_z', 'qual': '2SLAQ_qual'})
preds = preds.merge(kids_x_2slaq, on=['ID'], how='left')

In [9]:
preds.columns

Index(['ID', 'CLASS', 'Z', 'GALAXY_PHOTO', 'QSO_PHOTO', 'STAR_PHOTO',
       'CLASS_PHOTO', 'test_subset', 'QSO_Z_PHOTO', 'QSO_Z_PHOTO_STDDEV',
       'GALAXY_Z_PHOTO', 'GALAXY_Z_PHOTO_STDDEV', 'Z_B', 'CLASS_STAR',
       'MAG_GAAP_u', 'MAG_GAAP_g', 'MAG_GAAP_r', 'MAG_GAAP_i', 'MAG_GAAP_Z',
       'MAG_GAAP_Y', 'MAG_GAAP_J', 'MAG_GAAP_H', 'MAG_GAAP_Ks', '6QZ_id1',
       '2SLAQ_ID', '2SLAQ_z', '2SLAQ_qual'],
      dtype='object')

In [10]:
preds

Unnamed: 0,ID,CLASS,Z,GALAXY_PHOTO,QSO_PHOTO,STAR_PHOTO,CLASS_PHOTO,test_subset,QSO_Z_PHOTO,QSO_Z_PHOTO_STDDEV,...,MAG_GAAP_i,MAG_GAAP_Z,MAG_GAAP_Y,MAG_GAAP_J,MAG_GAAP_H,MAG_GAAP_Ks,6QZ_id1,2SLAQ_ID,2SLAQ_z,2SLAQ_qual
0,KiDSDR4 J140309.161-003220.97,GALAXY,0.586899,0.998199,0.001349,0.000452,GALAXY,random,1.706644,2.128615,...,19.902178,19.536497,19.179390,18.897888,18.520370,18.231775,,,,
1,KiDSDR4 J120122.260+013321.61,GALAXY,0.611530,0.998256,0.001151,0.000593,GALAXY,random,1.818052,2.063952,...,20.053967,19.627922,19.296600,19.077909,18.716476,18.408485,,,,
2,KiDSDR4 J090847.596+023644.26,STAR,0.000335,0.000341,0.000794,0.998866,STAR,random,1.251606,1.455152,...,20.688025,20.653200,20.821135,20.965322,21.020964,21.594004,,,,
3,KiDSDR4 J104753.969+015329.49,STAR,0.000627,0.000396,0.000002,0.999602,STAR,random,2.491770,0.201033,...,16.938957,16.869883,16.933428,17.046371,17.305948,17.643507,,,,
4,KiDSDR4 J142240.933-001030.41,GALAXY,0.321658,0.999665,0.000136,0.000199,GALAXY,random,1.432612,1.747922,...,18.672117,18.324923,18.035532,17.747923,17.482862,17.215593,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24587,KiDSDR4 J133022.369+012549.16,GALAXY,0.769532,0.984839,0.006141,0.009020,GALAXY,top,1.740947,2.089746,...,20.982185,20.344814,19.993013,19.627550,19.190191,19.049488,,,,
24588,KiDSDR4 J133136.340+012548.43,GALAXY,0.672450,0.999627,0.000271,0.000101,GALAXY,top,1.930425,2.248334,...,20.502752,19.975573,19.608318,19.294392,18.831553,18.503931,,,,
24589,KiDSDR4 J133329.695+012144.09,GALAXY,0.625599,0.996864,0.002137,0.000999,GALAXY,top,1.272066,1.885458,...,20.523840,20.015177,19.674679,19.372662,19.090460,18.707203,,,,
24590,KiDSDR4 J133110.667+012334.28,GALAXY,0.675689,0.999282,0.000460,0.000257,GALAXY,top,2.179146,2.452188,...,20.323719,19.824060,19.519434,19.100039,18.611195,18.364756,,,,


In [11]:
# Save
preds.to_csv(os.path.join(DATA_PATH, 'share/KiDS_DR4_x_SDSS_DR14_ann_f-all__2020-06-08.csv'))